/* Autogenerated file, DO NOT EDIT manually! generated by gen_perf.py
 *
 * Copyright (c) 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <stdint.h>
#include <stdbool.h>

#include <drm-uapi/i915_drm.h>

#include "util/hash_table.h"
#include "util/ralloc.h"

#include "intel_perf_metrics.h"
#include "perf/intel_perf.h"


#define MIN(a, b) ((a < b) ? (a) : (b))
#define MAX(a, b) ((a > b) ? (a) : (b))



/* Render Metrics Basic set :: GPU Time Elapsed */
static uint64_t
hsw__render_basic__gpu_time__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_TIME 0 READ 1000000000 UMUL $GpuTimestampFrequency UDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_time_offset + 0];
   uint64_t tmp1 = tmp0 * 1000000000;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = perf->sys_vars.timestamp_frequency;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: GPU Core Clocks */
static uint64_t
hsw__render_basic__gpu_core_clocks__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 2 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 2];

   return tmp0;
}

/* Render Metrics Basic set :: AVG GPU Core Frequency */
static uint64_t
hsw__render_basic__avg_gpu_core_frequency__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__render_basic__gpu_core_clocks__read(perf, query, results) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: AVG GPU Core Frequency */
static uint64_t
hsw__render_basic__avg_gpu_core_frequency__max(struct intel_perf_config *perf)
{
   /* RPN equation: $GpuMaxFrequency */

   return perf->sys_vars.gt_max_freq;
}

/* Render Metrics Basic set :: VS Threads Dispatched */
static uint64_t
hsw__render_basic__vs_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 5 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 5];

   return tmp0;
}

/* Render Metrics Basic set :: TCS Threads Dispatched */
static uint64_t
hsw__render_basic__hs_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 10 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 10];

   return tmp0;
}

/* Render Metrics Basic set :: TES Threads Dispatched */
static uint64_t
hsw__render_basic__ds_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 15 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 15];

   return tmp0;
}

/* Render Metrics Basic set :: GS Threads Dispatched */
static uint64_t
hsw__render_basic__gs_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 25 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 25];

   return tmp0;
}

/* Render Metrics Basic set :: FS Threads Dispatched */
static uint64_t
hsw__render_basic__ps_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 30 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 30];

   return tmp0;
}

/* Render Metrics Basic set :: CS Threads Dispatched */
static uint64_t
hsw__render_basic__cs_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 20 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 20];

   return tmp0;
}

/* Render Metrics Basic set :: GPU Busy */
static float
hsw__render_basic__gpu_busy__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: EU Active */
static float
hsw__render_basic__eu_active__read(UNUSED struct intel_perf_config *perf,
                                   const struct intel_perf_query_info *query,
                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: EU Stall */
static float
hsw__render_basic__eu_stall__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: VS EU Active */
static float
hsw__render_basic__vs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: VS AVG Active per Thread */
static uint64_t
hsw__render_basic__vs_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: A 2 READ $VsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__vs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: VS AVG Stall per Thread */
static uint64_t
hsw__render_basic__vs_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 3 READ $VsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__vs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: VS EU Stall */
static float
hsw__render_basic__vs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: TCS EU Active */
static float
hsw__render_basic__hs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: TCS AVG Active per Thread */
static uint64_t
hsw__render_basic__hs_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ $HsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__hs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: TCS AVG Stall per Thread */
static uint64_t
hsw__render_basic__hs_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 8 READ $HsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__hs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: TCS EU Stall */
static float
hsw__render_basic__hs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: TES EU Active */
static float
hsw__render_basic__ds_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: TES AVG Active per Thread */
static uint64_t
hsw__render_basic__ds_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: A 12 READ $DsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ds_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: TES AVG Stall per Thread */
static uint64_t
hsw__render_basic__ds_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 13 READ $DsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ds_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: TES EU Stall */
static float
hsw__render_basic__ds_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: GS EU Active */
static float
hsw__render_basic__gs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: GS AVG Active per Thread */
static uint64_t
hsw__render_basic__gs_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: A 22 READ $GsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__gs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: GS AVG Stall per Thread */
static uint64_t
hsw__render_basic__gs_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 23 READ $GsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__gs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: GS EU Stall */
static float
hsw__render_basic__gs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: CS EU Active */
static float
hsw__render_basic__cs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: CS AVG Active per Thread */
static uint64_t
hsw__render_basic__cs_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: A 17 READ $CsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__cs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: CS AVG Stall per Thread */
static uint64_t
hsw__render_basic__cs_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 18 READ $CsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__cs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: CS EU Stall */
static float
hsw__render_basic__cs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: FS EU Active */
static float
hsw__render_basic__ps_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: FS AVG Active per Thread */
static uint64_t
hsw__render_basic__ps_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: A 27 READ $PsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ps_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: FS AVG Stall per Thread */
static uint64_t
hsw__render_basic__ps_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 28 READ $PsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__render_basic__ps_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: FS EU Stall */
static float
hsw__render_basic__ps_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: Sampler 0 Busy */
static float
hsw__render_basic__sampler0_busy__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Sampler 1 Busy */
static float
hsw__render_basic__sampler1_busy__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: B 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Samplers Busy */
static float
hsw__render_basic__samplers_busy__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ  B 1 READ UADD $GpuCoreClocks FDIV 2 FDIV 100 FMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   double tmp6 = tmp5;
   double tmp7 = 2;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 * 100;

   return tmp9;
}

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
static float
hsw__render_basic__sampler0_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
static float
hsw__render_basic__sampler1_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Sampler 0 Texels LOD0 */
static uint64_t
hsw__render_basic__sampler0_texels__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Sampler 1 Texels LOD0 */
static uint64_t
hsw__render_basic__sampler1_texels__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: B 5 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Sampler Texels LOD0 */
static uint64_t
hsw__render_basic__sampler_texels__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: $Sampler0Texels $Sampler1Texels UADD $EuSlicesTotalCount UMUL */
   uint64_t tmp0 = hsw__render_basic__sampler0_texels__read(perf, query, results) + hsw__render_basic__sampler1_texels__read(perf, query, results);
   uint64_t tmp1 = tmp0 * perf->sys_vars.n_eu_slices;

   return tmp1;
}

/* Render Metrics Basic set :: L3 Sampler Throughput */
static uint64_t
hsw__render_basic__l3_sampler_throughput__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ B 7 READ UADD 2 UMUL $EuSlicesTotalCount UMUL 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];
   uint64_t tmp1 = results->accumulator[query->b_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = tmp3 * perf->sys_vars.n_eu_slices;
   uint64_t tmp5 = tmp4 * 64;

   return tmp5;
}

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
static uint64_t
hsw__render_basic__hi_depth_test_fails__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: A 33 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 33];

   return tmp0;
}

/* Render Metrics Basic set :: Early Depth Test Fails */
static uint64_t
hsw__render_basic__early_depth_test_fails__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 35 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 35];

   return tmp0;
}

/* Render Metrics Basic set :: Samples Killed in FS */
static uint64_t
hsw__render_basic__samples_killed_in_ps__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: A 36 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 36];

   return tmp0;
}

/* Render Metrics Basic set :: Alpha Test Fails */
static uint64_t
hsw__render_basic__alpha_test_fails__read(UNUSED struct intel_perf_config *perf,
                                          const struct intel_perf_query_info *query,
                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: A 37 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 37];

   return tmp0;
}

/* Render Metrics Basic set :: Late Stencil Test Fails */
static uint64_t
hsw__render_basic__post_ps_stencil_test_fails__read(UNUSED struct intel_perf_config *perf,
                                                    const struct intel_perf_query_info *query,
                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 38 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 38];

   return tmp0;
}

/* Render Metrics Basic set :: Late Depth Test Fails */
static uint64_t
hsw__render_basic__post_ps_depth_test_fails__read(UNUSED struct intel_perf_config *perf,
                                                  const struct intel_perf_query_info *query,
                                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 39 READ $SamplesKilledInPs USUB */
   uint64_t tmp0 = results->accumulator[query->a_offset + 39];
   uint64_t tmp1 = tmp0 - hsw__render_basic__samples_killed_in_ps__read(perf, query, results);

   return tmp1;
}

/* Render Metrics Basic set :: Samples Written */
static uint64_t
hsw__render_basic__samples_written__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 40 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 40];

   return tmp0;
}

/* Render Metrics Basic set :: Samples Blended */
static uint64_t
hsw__render_basic__samples_blended__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ $EuSlicesTotalCount 4 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = perf->sys_vars.n_eu_slices * 4;
   uint64_t tmp2 = tmp0 * tmp1;

   return tmp2;
}

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
static uint64_t
hsw__render_basic__gti_vf_throughput__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: GTI Depth Throughput */
static uint64_t
hsw__render_basic__gti_depth_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: GTI RCC Throughput */
static uint64_t
hsw__render_basic__gti_rcc_throughput__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: GTI L3 Throughput */
static uint64_t
hsw__render_basic__gti_l3_throughput__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: GTI Read Throughput */
static uint64_t
hsw__render_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: C 6 READ 128 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 128;

   return tmp1;
}

/* Render Metrics Basic set :: GTI Write Throughput */
static uint64_t
hsw__render_basic__gti_write_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: LLC GPU Accesses */
static uint64_t
hsw__render_basic__llc_accesses__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: PERFCNT 0 READ */
   uint64_t tmp0 = results->accumulator[query->perfcnt_offset + 0];

   return tmp0;
}

/* Render Metrics Basic set :: LLC GPU Hits */
static uint64_t
hsw__render_basic__llc_hits__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: PERFCNT 1 READ */
   uint64_t tmp0 = results->accumulator[query->perfcnt_offset + 1];

   return tmp0;
}

/* Render Metrics Basic set :: LLC GPU Throughput */
static uint64_t
hsw__render_basic__llc_gpu_throughput__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: PERFCNT 0 READ C 7 READ USUB 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->perfcnt_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 7];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Render Metrics Basic set :: FS Duration */
static uint64_t
hsw__render_basic__ps_duration__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 27];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 28];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic set :: VS Duration */
static uint64_t
hsw__render_basic__vs_duration__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 2 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 3 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 2];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 3];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic set :: GS Duration */
static uint64_t
hsw__render_basic__gs_duration__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 22];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 23];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic set :: TES Duration */
static uint64_t
hsw__render_basic__ds_duration__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 12];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 13];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic set :: TCS Duration */
static uint64_t
hsw__render_basic__hs_duration__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 8];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic set :: CS Duration */
static uint64_t
hsw__render_basic__cs_duration__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 18];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__render_basic__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Render Metrics Basic set :: Sampler Bottleneck */
static float
hsw__render_basic__sampler_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: $Sampler0Bottleneck $Sampler1Bottleneck FMAX */
   double tmp0 = hsw__render_basic__sampler0_bottleneck__read(perf, query, results);
   double tmp1 = hsw__render_basic__sampler1_bottleneck__read(perf, query, results);
   double tmp2 = MAX(tmp0, tmp1);

   return tmp2;
}

/* Render Metrics Basic set :: EU Idle */
static float
hsw__render_basic__eu_idle__read(UNUSED struct intel_perf_config *perf,
                                 const struct intel_perf_query_info *query,
                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: 100 $EuActive $EuStall FADD FSUB */
   double tmp0 = hsw__render_basic__eu_active__read(perf, query, results) + hsw__render_basic__eu_stall__read(perf, query, results);
   double tmp1 = 100 - tmp0;

   return tmp1;
}

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define hsw__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define hsw__compute_basic__gpu_core_clocks__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define hsw__compute_basic__avg_gpu_core_frequency__read \
   hsw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define hsw__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define hsw__compute_basic__gpu_busy__read \
   hsw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define hsw__compute_basic__vs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: TCS Threads Dispatched */
#define hsw__compute_basic__hs_threads__read \
   hsw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: TES Threads Dispatched */
#define hsw__compute_basic__ds_threads__read \
   hsw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define hsw__compute_basic__gs_threads__read \
   hsw__render_basic__gs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define hsw__compute_basic__ps_threads__read \
   hsw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define hsw__compute_basic__cs_threads__read \
   hsw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define hsw__compute_basic__eu_active__read \
   hsw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define hsw__compute_basic__eu_stall__read \
   hsw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: VS EU Active */
#define hsw__compute_basic__vs_eu_active__read \
   hsw__render_basic__vs_eu_active__read

/* Compute Metrics Basic set :: VS AVG Active per Thread */
#define hsw__compute_basic__vs_eu_active_per_thread__read \
   hsw__render_basic__vs_eu_active_per_thread__read

/* Compute Metrics Basic set :: VS EU Stall */
#define hsw__compute_basic__vs_eu_stall__read \
   hsw__render_basic__vs_eu_stall__read

/* Compute Metrics Basic set :: VS AVG Stall per Thread */
#define hsw__compute_basic__vs_eu_stall_per_thread__read \
   hsw__render_basic__vs_eu_stall_per_thread__read

/* Compute Metrics Basic set :: FS EU Active */
#define hsw__compute_basic__ps_eu_active__read \
   hsw__render_basic__ps_eu_active__read

/* Compute Metrics Basic set :: TCS EU Active */
#define hsw__compute_basic__hs_eu_active__read \
   hsw__render_basic__hs_eu_active__read

/* Compute Metrics Basic set :: TCS AVG Active per Thread */
#define hsw__compute_basic__hs_eu_active_per_thread__read \
   hsw__render_basic__hs_eu_active_per_thread__read

/* Compute Metrics Basic set :: TCS EU Stall */
#define hsw__compute_basic__hs_eu_stall__read \
   hsw__render_basic__hs_eu_stall__read

/* Compute Metrics Basic set :: TCS AVG Stall per Thread */
#define hsw__compute_basic__hs_eu_stall_per_thread__read \
   hsw__render_basic__hs_eu_stall_per_thread__read

/* Compute Metrics Basic set :: TES EU Active */
#define hsw__compute_basic__ds_eu_active__read \
   hsw__render_basic__ds_eu_active__read

/* Compute Metrics Basic set :: TES AVG Active per Thread */
#define hsw__compute_basic__ds_eu_active_per_thread__read \
   hsw__render_basic__ds_eu_active_per_thread__read

/* Compute Metrics Basic set :: TES EU Stall */
#define hsw__compute_basic__ds_eu_stall__read \
   hsw__render_basic__ds_eu_stall__read

/* Compute Metrics Basic set :: TES AVG Stall per Thread */
#define hsw__compute_basic__ds_eu_stall_per_thread__read \
   hsw__render_basic__ds_eu_stall_per_thread__read

/* Compute Metrics Basic set :: GS EU Active */
#define hsw__compute_basic__gs_eu_active__read \
   hsw__render_basic__gs_eu_active__read

/* Compute Metrics Basic set :: GS AVG Active per Thread */
#define hsw__compute_basic__gs_eu_active_per_thread__read \
   hsw__render_basic__gs_eu_active_per_thread__read

/* Compute Metrics Basic set :: GS EU Stall */
#define hsw__compute_basic__gs_eu_stall__read \
   hsw__render_basic__gs_eu_stall__read

/* Compute Metrics Basic set :: GS AVG Stall per Thread */
#define hsw__compute_basic__gs_eu_stall_per_thread__read \
   hsw__render_basic__gs_eu_stall_per_thread__read

/* Compute Metrics Basic set :: CS EU Active */
#define hsw__compute_basic__cs_eu_active__read \
   hsw__render_basic__cs_eu_active__read

/* Compute Metrics Basic set :: CS AVG Active per Thread */
#define hsw__compute_basic__cs_eu_active_per_thread__read \
   hsw__render_basic__cs_eu_active_per_thread__read

/* Compute Metrics Basic set :: CS EU Stall */
#define hsw__compute_basic__cs_eu_stall__read \
   hsw__render_basic__cs_eu_stall__read

/* Compute Metrics Basic set :: CS AVG Stall per Thread */
#define hsw__compute_basic__cs_eu_stall_per_thread__read \
   hsw__render_basic__cs_eu_stall_per_thread__read

/* Compute Metrics Basic set :: FS AVG Active per Thread */
#define hsw__compute_basic__ps_eu_active_per_thread__read \
   hsw__render_basic__ps_eu_active_per_thread__read

/* Compute Metrics Basic set :: FS EU Stall */
#define hsw__compute_basic__ps_eu_stall__read \
   hsw__render_basic__ps_eu_stall__read

/* Compute Metrics Basic set :: FS AVG Stall per Thread */
#define hsw__compute_basic__ps_eu_stall_per_thread__read \
   hsw__render_basic__ps_eu_stall_per_thread__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define hsw__compute_basic__hi_depth_test_fails__read \
   hsw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define hsw__compute_basic__early_depth_test_fails__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define hsw__compute_basic__samples_killed_in_ps__read \
   hsw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Alpha Test Fails */
#define hsw__compute_basic__alpha_test_fails__read \
   hsw__render_basic__alpha_test_fails__read

/* Compute Metrics Basic set :: Late Stencil Test Fails */
#define hsw__compute_basic__post_ps_stencil_test_fails__read \
   hsw__render_basic__post_ps_stencil_test_fails__read

/* Compute Metrics Basic set :: Late Depth Test Fails */
#define hsw__compute_basic__post_ps_depth_test_fails__read \
   hsw__render_basic__post_ps_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Written */
#define hsw__compute_basic__samples_written__read \
   hsw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Typed Bytes Read */
static uint64_t
hsw__compute_basic__typed_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ B 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: Typed Bytes Written */
static uint64_t
hsw__compute_basic__typed_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ B 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];
   uint64_t tmp1 = results->accumulator[query->b_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: Typed Atomics */
static uint64_t
hsw__compute_basic__typed_atomics__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ C 1 READ UADD $EuSlicesTotalCount UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * perf->sys_vars.n_eu_slices;

   return tmp3;
}

/* Compute Metrics Basic set :: Untyped Bytes Read */
static uint64_t
hsw__compute_basic__untyped_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 1 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: Untyped Writes */
static uint64_t
hsw__compute_basic__untyped_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ B 3 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = results->accumulator[query->b_offset + 3];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: SLM Bytes Read */
static uint64_t
hsw__compute_basic__slm_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 6 READ C 7 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 6];
   uint64_t tmp1 = results->accumulator[query->c_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: SLM Bytes Written */
static uint64_t
hsw__compute_basic__slm_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ C 5 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: LLC GPU Accesses */
#define hsw__compute_basic__llc_accesses__read \
   hsw__render_basic__llc_accesses__read

/* Compute Metrics Basic set :: LLC GPU Hits */
#define hsw__compute_basic__llc_hits__read \
   hsw__render_basic__llc_hits__read

/* Compute Metrics Extended set :: GPU Time Elapsed */
#define hsw__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended set :: GPU Core Clocks */
static uint64_t
hsw__compute_extended__gpu_clocks__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];

   return tmp0;
}

/* Compute Metrics Extended set :: CS Threads Dispatched */
#define hsw__compute_extended__cs_threads__read \
   hsw__render_basic__cs_threads__read

/* Compute Metrics Extended set :: EuUntypedReads0 */
static uint64_t
hsw__compute_extended__eu_untyped_reads0__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];

   return tmp0;
}

/* Compute Metrics Extended set :: EuTypedReads0 */
static uint64_t
hsw__compute_extended__eu_typed_reads0__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];

   return tmp0;
}

/* Compute Metrics Extended set :: EuUntypedWrites0 */
static uint64_t
hsw__compute_extended__eu_untyped_writes0__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: B 1 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];

   return tmp0;
}

/* Compute Metrics Extended set :: EuTypedWrites0 */
static uint64_t
hsw__compute_extended__eu_typed_writes0__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];

   return tmp0;
}

/* Compute Metrics Extended set :: EuUntypedAtomics0 */
static uint64_t
hsw__compute_extended__eu_untyped_atomics0__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];

   return tmp0;
}

/* Compute Metrics Extended set :: EuTypedAtomics0 */
static uint64_t
hsw__compute_extended__eu_typed_atomics0__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: B 5 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];

   return tmp0;
}

/* Compute Metrics Extended set :: EuUrbAtomics0 */
static uint64_t
hsw__compute_extended__eu_urb_atomics0__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];

   return tmp0;
}

/* Compute Metrics Extended set :: Typed Reads 0 */
#define hsw__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: Typed Writes 0 */
static uint64_t
hsw__compute_extended__typed_writes0__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];

   return tmp0;
}

/* Compute Metrics Extended set :: Untyped Reads 0 */
static uint64_t
hsw__compute_extended__untyped_reads0__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];

   return tmp0;
}

/* Compute Metrics Extended set :: Untyped Writes 0 */
static uint64_t
hsw__compute_extended__untyped_writes0__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];

   return tmp0;
}

/* Compute Metrics Extended set :: Typed Atomics 0 */
static uint64_t
hsw__compute_extended__typed_atomics0__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];

   return tmp0;
}

/* Compute Metrics Extended set :: TypedReadsPerCacheLine */
static float
hsw__compute_extended__typed_reads_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                        const struct intel_perf_query_info *query,
                                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuTypedReads0 $TypedReads0 FDIV */
   double tmp0 = hsw__compute_extended__eu_typed_reads0__read(perf, query, results);
   double tmp1 = hsw__compute_extended__typed_reads0__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended set :: TypedWritesPerCacheLine */
static float
hsw__compute_extended__typed_writes_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                         const struct intel_perf_query_info *query,
                                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuTypedWrites0 $TypedWrites0 FDIV */
   double tmp0 = hsw__compute_extended__eu_typed_writes0__read(perf, query, results);
   double tmp1 = hsw__compute_extended__typed_writes0__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended set :: UntypedReadsPerCacheLine */
static float
hsw__compute_extended__untyped_reads_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                          const struct intel_perf_query_info *query,
                                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuUntypedReads0 $UntypedReads0 FDIV */
   double tmp0 = hsw__compute_extended__eu_untyped_reads0__read(perf, query, results);
   double tmp1 = hsw__compute_extended__untyped_reads0__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended set :: UntypedWritesPerCacheLine */
static float
hsw__compute_extended__untyped_writes_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                           const struct intel_perf_query_info *query,
                                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuUntypedWrites0 $UntypedWrites0 FDIV */
   double tmp0 = hsw__compute_extended__eu_untyped_writes0__read(perf, query, results);
   double tmp1 = hsw__compute_extended__untyped_writes0__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended set :: TypedAtomicsPerCacheLine */
static float
hsw__compute_extended__typed_atomics_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                          const struct intel_perf_query_info *query,
                                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuTypedAtomics0 $TypedAtomics0 FDIV */
   double tmp0 = hsw__compute_extended__eu_typed_atomics0__read(perf, query, results);
   double tmp1 = hsw__compute_extended__typed_atomics0__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Metrics Extended set :: LLC GPU Accesses */
#define hsw__compute_extended__llc_accesses__read \
   hsw__render_basic__llc_accesses__read

/* Compute Metrics Extended set :: LLC GPU Hits */
#define hsw__compute_extended__llc_hits__read \
   hsw__render_basic__llc_hits__read

/* Memory Reads Distribution set :: GPU Time Elapsed */
#define hsw__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution set :: GPU Core Clocks */
static uint64_t
hsw__memory_reads__gpu_core_clocks__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];

   return tmp0;
}

/* Memory Reads Distribution set :: AVG GPU Core Frequency */
static uint64_t
hsw__memory_reads__avg_gpu_core_frequency__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__memory_reads__gpu_time__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Memory Reads Distribution set :: AVG GPU Core Frequency */
#define hsw__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution set :: GPU Busy */
static float
hsw__memory_reads__gpu_busy__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Reads Distribution set :: VS Threads Dispatched */
#define hsw__memory_reads__vs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution set :: TCS Threads Dispatched */
#define hsw__memory_reads__hs_threads__read \
   hsw__render_basic__hs_threads__read

/* Memory Reads Distribution set :: TES Threads Dispatched */
#define hsw__memory_reads__ds_threads__read \
   hsw__render_basic__ds_threads__read

/* Memory Reads Distribution set :: GS Threads Dispatched */
#define hsw__memory_reads__gs_threads__read \
   hsw__render_basic__gs_threads__read

/* Memory Reads Distribution set :: FS Threads Dispatched */
#define hsw__memory_reads__ps_threads__read \
   hsw__render_basic__ps_threads__read

/* Memory Reads Distribution set :: CS Threads Dispatched */
#define hsw__memory_reads__cs_threads__read \
   hsw__render_basic__cs_threads__read

/* Memory Reads Distribution set :: EU Active */
static float
hsw__memory_reads__eu_active__read(UNUSED struct intel_perf_config *perf,
                                   const struct intel_perf_query_info *query,
                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: EU Stall */
static float
hsw__memory_reads__eu_stall__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: VS EU Active */
static float
hsw__memory_reads__vs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: VS AVG Active per Thread */
#define hsw__memory_reads__vs_eu_active_per_thread__read \
   hsw__render_basic__vs_eu_active_per_thread__read

/* Memory Reads Distribution set :: VS EU Stall */
static float
hsw__memory_reads__vs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: VS AVG Stall per Thread */
#define hsw__memory_reads__vs_eu_stall_per_thread__read \
   hsw__render_basic__vs_eu_stall_per_thread__read

/* Memory Reads Distribution set :: TCS EU Active */
static float
hsw__memory_reads__hs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: TCS AVG Active per Thread */
#define hsw__memory_reads__hs_eu_active_per_thread__read \
   hsw__render_basic__hs_eu_active_per_thread__read

/* Memory Reads Distribution set :: TCS EU Stall */
static float
hsw__memory_reads__hs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: TCS AVG Stall per Thread */
#define hsw__memory_reads__hs_eu_stall_per_thread__read \
   hsw__render_basic__hs_eu_stall_per_thread__read

/* Memory Reads Distribution set :: TES EU Active */
static float
hsw__memory_reads__ds_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: TES AVG Active per Thread */
#define hsw__memory_reads__ds_eu_active_per_thread__read \
   hsw__render_basic__ds_eu_active_per_thread__read

/* Memory Reads Distribution set :: TES EU Stall */
static float
hsw__memory_reads__ds_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: TES AVG Stall per Thread */
#define hsw__memory_reads__ds_eu_stall_per_thread__read \
   hsw__render_basic__ds_eu_stall_per_thread__read

/* Memory Reads Distribution set :: GS EU Active */
static float
hsw__memory_reads__gs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: GS AVG Active per Thread */
#define hsw__memory_reads__gs_eu_active_per_thread__read \
   hsw__render_basic__gs_eu_active_per_thread__read

/* Memory Reads Distribution set :: GS EU Stall */
static float
hsw__memory_reads__gs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: GS AVG Stall per Thread */
#define hsw__memory_reads__gs_eu_stall_per_thread__read \
   hsw__render_basic__gs_eu_stall_per_thread__read

/* Memory Reads Distribution set :: CS EU Active */
static float
hsw__memory_reads__cs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: CS AVG Active per Thread */
#define hsw__memory_reads__cs_eu_active_per_thread__read \
   hsw__render_basic__cs_eu_active_per_thread__read

/* Memory Reads Distribution set :: CS EU Stall */
static float
hsw__memory_reads__cs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: CS AVG Stall per Thread */
#define hsw__memory_reads__cs_eu_stall_per_thread__read \
   hsw__render_basic__cs_eu_stall_per_thread__read

/* Memory Reads Distribution set :: FS EU Active */
static float
hsw__memory_reads__ps_eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: FS AVG Active per Thread */
#define hsw__memory_reads__ps_eu_active_per_thread__read \
   hsw__render_basic__ps_eu_active_per_thread__read

/* Memory Reads Distribution set :: FS EU Stall */
static float
hsw__memory_reads__ps_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__memory_reads__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Memory Reads Distribution set :: FS AVG Stall per Thread */
#define hsw__memory_reads__ps_eu_stall_per_thread__read \
   hsw__render_basic__ps_eu_stall_per_thread__read

/* Memory Reads Distribution set :: Early Hi-Depth Test Fails */
#define hsw__memory_reads__hi_depth_test_fails__read \
   hsw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution set :: Early Depth Test Fails */
#define hsw__memory_reads__early_depth_test_fails__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution set :: Samples Killed in FS */
#define hsw__memory_reads__samples_killed_in_ps__read \
   hsw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution set :: Alpha Test Fails */
#define hsw__memory_reads__alpha_test_fails__read \
   hsw__render_basic__alpha_test_fails__read

/* Memory Reads Distribution set :: Late Stencil Test Fails */
#define hsw__memory_reads__post_ps_stencil_test_fails__read \
   hsw__render_basic__post_ps_stencil_test_fails__read

/* Memory Reads Distribution set :: Late Depth Test Fails */
#define hsw__memory_reads__post_ps_depth_test_fails__read \
   hsw__render_basic__post_ps_depth_test_fails__read

/* Memory Reads Distribution set :: Samples Written */
#define hsw__memory_reads__samples_written__read \
   hsw__render_basic__samples_written__read

/* Memory Reads Distribution set :: GtiCmdStreamerMemoryReads */
#define hsw__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution set :: GtiRsMemoryReads */
#define hsw__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution set :: GtiVfMemoryReads */
#define hsw__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution set :: GtiRccMemoryReads */
#define hsw__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution set :: GtiMscMemoryReads */
#define hsw__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution set :: GtiHiDepthMemoryReads */
#define hsw__memory_reads__gti_hi_depth_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution set :: GtiStcMemoryReads */
#define hsw__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution set :: GtiRczMemoryReads */
#define hsw__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution set :: GtiL3Reads */
#define hsw__memory_reads__gti_l3_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution set :: GtiMemoryReads */
static uint64_t
hsw__memory_reads__gti_memory_reads__read(UNUSED struct intel_perf_config *perf,
                                          const struct intel_perf_query_info *query,
                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];

   return tmp0;
}

/* Memory Reads Distribution set :: LLC GPU Read Accesses */
static uint64_t
hsw__memory_reads__llc_read_accesses__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 6 READ */
   uint64_t tmp0 = results->accumulator[query->c_offset + 6];

   return tmp0;
}

/* Memory Reads Distribution set :: LLC GPU Accesses */
#define hsw__memory_reads__llc_accesses__read \
   hsw__render_basic__llc_accesses__read

/* Memory Reads Distribution set :: LLC GPU Hits */
#define hsw__memory_reads__llc_hits__read \
   hsw__render_basic__llc_hits__read

/* Memory Writes Distribution set :: GPU Time Elapsed */
#define hsw__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution set :: GPU Core Clocks */
#define hsw__memory_writes__gpu_core_clocks__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution set :: AVG GPU Core Frequency */
#define hsw__memory_writes__avg_gpu_core_frequency__read \
   hsw__memory_reads__avg_gpu_core_frequency__read

/* Memory Writes Distribution set :: AVG GPU Core Frequency */
#define hsw__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution set :: GPU Busy */
#define hsw__memory_writes__gpu_busy__read \
   hsw__memory_reads__gpu_busy__read

/* Memory Writes Distribution set :: VS Threads Dispatched */
#define hsw__memory_writes__vs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution set :: TCS Threads Dispatched */
#define hsw__memory_writes__hs_threads__read \
   hsw__render_basic__hs_threads__read

/* Memory Writes Distribution set :: TES Threads Dispatched */
#define hsw__memory_writes__ds_threads__read \
   hsw__render_basic__ds_threads__read

/* Memory Writes Distribution set :: GS Threads Dispatched */
#define hsw__memory_writes__gs_threads__read \
   hsw__render_basic__gs_threads__read

/* Memory Writes Distribution set :: FS Threads Dispatched */
#define hsw__memory_writes__ps_threads__read \
   hsw__render_basic__ps_threads__read

/* Memory Writes Distribution set :: CS Threads Dispatched */
#define hsw__memory_writes__cs_threads__read \
   hsw__render_basic__cs_threads__read

/* Memory Writes Distribution set :: EU Active */
#define hsw__memory_writes__eu_active__read \
   hsw__memory_reads__eu_active__read

/* Memory Writes Distribution set :: EU Stall */
#define hsw__memory_writes__eu_stall__read \
   hsw__memory_reads__eu_stall__read

/* Memory Writes Distribution set :: VS EU Active */
#define hsw__memory_writes__vs_eu_active__read \
   hsw__memory_reads__vs_eu_active__read

/* Memory Writes Distribution set :: VS AVG Active per Thread */
#define hsw__memory_writes__vs_eu_active_per_thread__read \
   hsw__render_basic__vs_eu_active_per_thread__read

/* Memory Writes Distribution set :: VS EU Stall */
#define hsw__memory_writes__vs_eu_stall__read \
   hsw__memory_reads__vs_eu_stall__read

/* Memory Writes Distribution set :: VS AVG Stall per Thread */
#define hsw__memory_writes__vs_eu_stall_per_thread__read \
   hsw__render_basic__vs_eu_stall_per_thread__read

/* Memory Writes Distribution set :: TCS EU Active */
#define hsw__memory_writes__hs_eu_active__read \
   hsw__memory_reads__hs_eu_active__read

/* Memory Writes Distribution set :: TCS AVG Active per Thread */
#define hsw__memory_writes__hs_eu_active_per_thread__read \
   hsw__render_basic__hs_eu_active_per_thread__read

/* Memory Writes Distribution set :: TCS EU Stall */
#define hsw__memory_writes__hs_eu_stall__read \
   hsw__memory_reads__hs_eu_stall__read

/* Memory Writes Distribution set :: TCS AVG Stall per Thread */
#define hsw__memory_writes__hs_eu_stall_per_thread__read \
   hsw__render_basic__hs_eu_stall_per_thread__read

/* Memory Writes Distribution set :: TES EU Active */
#define hsw__memory_writes__ds_eu_active__read \
   hsw__memory_reads__ds_eu_active__read

/* Memory Writes Distribution set :: TES AVG Active per Thread */
#define hsw__memory_writes__ds_eu_active_per_thread__read \
   hsw__render_basic__ds_eu_active_per_thread__read

/* Memory Writes Distribution set :: TES EU Stall */
#define hsw__memory_writes__ds_eu_stall__read \
   hsw__memory_reads__ds_eu_stall__read

/* Memory Writes Distribution set :: TES AVG Stall per Thread */
#define hsw__memory_writes__ds_eu_stall_per_thread__read \
   hsw__render_basic__ds_eu_stall_per_thread__read

/* Memory Writes Distribution set :: GS EU Active */
#define hsw__memory_writes__gs_eu_active__read \
   hsw__memory_reads__gs_eu_active__read

/* Memory Writes Distribution set :: GS AVG Active per Thread */
#define hsw__memory_writes__gs_eu_active_per_thread__read \
   hsw__render_basic__gs_eu_active_per_thread__read

/* Memory Writes Distribution set :: GS EU Stall */
#define hsw__memory_writes__gs_eu_stall__read \
   hsw__memory_reads__gs_eu_stall__read

/* Memory Writes Distribution set :: GS AVG Stall per Thread */
#define hsw__memory_writes__gs_eu_stall_per_thread__read \
   hsw__render_basic__gs_eu_stall_per_thread__read

/* Memory Writes Distribution set :: CS EU Active */
#define hsw__memory_writes__cs_eu_active__read \
   hsw__memory_reads__cs_eu_active__read

/* Memory Writes Distribution set :: CS AVG Active per Thread */
#define hsw__memory_writes__cs_eu_active_per_thread__read \
   hsw__render_basic__cs_eu_active_per_thread__read

/* Memory Writes Distribution set :: CS EU Stall */
#define hsw__memory_writes__cs_eu_stall__read \
   hsw__memory_reads__cs_eu_stall__read

/* Memory Writes Distribution set :: CS AVG Stall per Thread */
#define hsw__memory_writes__cs_eu_stall_per_thread__read \
   hsw__render_basic__cs_eu_stall_per_thread__read

/* Memory Writes Distribution set :: FS EU Active */
#define hsw__memory_writes__ps_eu_active__read \
   hsw__memory_reads__ps_eu_active__read

/* Memory Writes Distribution set :: FS AVG Active per Thread */
#define hsw__memory_writes__ps_eu_active_per_thread__read \
   hsw__render_basic__ps_eu_active_per_thread__read

/* Memory Writes Distribution set :: FS EU Stall */
#define hsw__memory_writes__ps_eu_stall__read \
   hsw__memory_reads__ps_eu_stall__read

/* Memory Writes Distribution set :: FS AVG Stall per Thread */
#define hsw__memory_writes__ps_eu_stall_per_thread__read \
   hsw__render_basic__ps_eu_stall_per_thread__read

/* Memory Writes Distribution set :: Early Hi-Depth Test Fails */
#define hsw__memory_writes__hi_depth_test_fails__read \
   hsw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution set :: Early Depth Test Fails */
#define hsw__memory_writes__early_depth_test_fails__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution set :: Samples Killed in FS */
#define hsw__memory_writes__samples_killed_in_ps__read \
   hsw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution set :: Alpha Test Fails */
#define hsw__memory_writes__alpha_test_fails__read \
   hsw__render_basic__alpha_test_fails__read

/* Memory Writes Distribution set :: Late Stencil Test Fails */
#define hsw__memory_writes__post_ps_stencil_test_fails__read \
   hsw__render_basic__post_ps_stencil_test_fails__read

/* Memory Writes Distribution set :: Late Depth Test Fails */
#define hsw__memory_writes__post_ps_depth_test_fails__read \
   hsw__render_basic__post_ps_depth_test_fails__read

/* Memory Writes Distribution set :: Samples Written */
#define hsw__memory_writes__samples_written__read \
   hsw__render_basic__samples_written__read

/* Memory Writes Distribution set :: GtiCmdStreamerMemoryWrites */
#define hsw__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution set :: GtiSoMemoryWrites */
#define hsw__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution set :: GtiRccMemoryWrites */
#define hsw__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution set :: GtiMscMemoryWrites */
#define hsw__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution set :: GtiHizMemoryWrites */
#define hsw__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution set :: GtiStcMemoryWrites */
#define hsw__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution set :: GtiRczMemoryWrites */
#define hsw__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution set :: GtiL3Writes */
#define hsw__memory_writes__gti_l3_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution set :: GtiMemoryWrites */
#define hsw__memory_writes__gti_memory_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution set :: LLC GPU Write Accesses */
static uint64_t
hsw__memory_writes__llc_wr_accesses__read(UNUSED struct intel_perf_config *perf,
                                          const struct intel_perf_query_info *query,
                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: C 6 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Memory Writes Distribution set :: LLC GPU Accesses */
#define hsw__memory_writes__llc_accesses__read \
   hsw__render_basic__llc_accesses__read

/* Memory Writes Distribution set :: LLC GPU Hits */
#define hsw__memory_writes__llc_hits__read \
   hsw__render_basic__llc_hits__read

/* Memory Writes Distribution set :: LLC GPU Throughput */
static uint64_t
hsw__memory_writes__llc_gpu_throughput__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: PERFCNT 0 READ $LlcWrAccesses USUB 64 UMUL $LlcWrAccesses 32 UMUL UADD */
   uint64_t tmp0 = results->accumulator[query->perfcnt_offset + 0];
   uint64_t tmp1 = tmp0 - hsw__memory_writes__llc_wr_accesses__read(perf, query, results);
   uint64_t tmp2 = tmp1 * 64;
   uint64_t tmp3 = hsw__memory_writes__llc_wr_accesses__read(perf, query, results) * 32;
   uint64_t tmp4 = tmp2 + tmp3;

   return tmp4;
}

/* Metric set SamplerBalance :: GPU Time Elapsed */
#define hsw__sampler_balance__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set SamplerBalance :: GPU Core Clocks */
#define hsw__sampler_balance__gpu_core_clocks__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set SamplerBalance :: AVG GPU Core Frequency */
static uint64_t
hsw__sampler_balance__avg_gpu_core_frequency__read(UNUSED struct intel_perf_config *perf,
                                                   const struct intel_perf_query_info *query,
                                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = hsw__sampler_balance__gpu_time__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set SamplerBalance :: AVG GPU Core Frequency */
#define hsw__sampler_balance__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set SamplerBalance :: GPU Busy */
static float
hsw__sampler_balance__gpu_busy__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 41 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 41];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set SamplerBalance :: VS Threads Dispatched */
#define hsw__sampler_balance__vs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set SamplerBalance :: TCS Threads Dispatched */
#define hsw__sampler_balance__hs_threads__read \
   hsw__render_basic__hs_threads__read

/* Metric set SamplerBalance :: TES Threads Dispatched */
#define hsw__sampler_balance__ds_threads__read \
   hsw__render_basic__ds_threads__read

/* Metric set SamplerBalance :: GS Threads Dispatched */
#define hsw__sampler_balance__gs_threads__read \
   hsw__render_basic__gs_threads__read

/* Metric set SamplerBalance :: FS Threads Dispatched */
#define hsw__sampler_balance__ps_threads__read \
   hsw__render_basic__ps_threads__read

/* Metric set SamplerBalance :: CS Threads Dispatched */
#define hsw__sampler_balance__cs_threads__read \
   hsw__render_basic__cs_threads__read

/* Metric set SamplerBalance :: EU Active */
static float
hsw__sampler_balance__eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 0 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: EU Stall */
static float
hsw__sampler_balance__eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 1 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 1];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: VS EU Active */
static float
hsw__sampler_balance__vs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 2 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: VS AVG Active per Thread */
#define hsw__sampler_balance__vs_eu_active_per_thread__read \
   hsw__render_basic__vs_eu_active_per_thread__read

/* Metric set SamplerBalance :: VS EU Stall */
static float
hsw__sampler_balance__vs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 3 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: VS AVG Stall per Thread */
#define hsw__sampler_balance__vs_eu_stall_per_thread__read \
   hsw__render_basic__vs_eu_stall_per_thread__read

/* Metric set SamplerBalance :: TCS EU Active */
static float
hsw__sampler_balance__hs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TCS AVG Active per Thread */
#define hsw__sampler_balance__hs_eu_active_per_thread__read \
   hsw__render_basic__hs_eu_active_per_thread__read

/* Metric set SamplerBalance :: TCS EU Stall */
static float
hsw__sampler_balance__hs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TCS AVG Stall per Thread */
#define hsw__sampler_balance__hs_eu_stall_per_thread__read \
   hsw__render_basic__hs_eu_stall_per_thread__read

/* Metric set SamplerBalance :: TES EU Active */
static float
hsw__sampler_balance__ds_eu_active__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TES AVG Active per Thread */
#define hsw__sampler_balance__ds_eu_active_per_thread__read \
   hsw__render_basic__ds_eu_active_per_thread__read

/* Metric set SamplerBalance :: TES EU Stall */
static float
hsw__sampler_balance__ds_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: TES AVG Stall per Thread */
#define hsw__sampler_balance__ds_eu_stall_per_thread__read \
   hsw__render_basic__ds_eu_stall_per_thread__read

/* Metric set SamplerBalance :: GS EU Active */
static float
hsw__sampler_balance__gs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 22 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: GS AVG Active per Thread */
#define hsw__sampler_balance__gs_eu_active_per_thread__read \
   hsw__render_basic__gs_eu_active_per_thread__read

/* Metric set SamplerBalance :: GS EU Stall */
static float
hsw__sampler_balance__gs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 23 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: GS AVG Stall per Thread */
#define hsw__sampler_balance__gs_eu_stall_per_thread__read \
   hsw__render_basic__gs_eu_stall_per_thread__read

/* Metric set SamplerBalance :: CS EU Active */
static float
hsw__sampler_balance__cs_eu_active__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: CS AVG Active per Thread */
#define hsw__sampler_balance__cs_eu_active_per_thread__read \
   hsw__render_basic__cs_eu_active_per_thread__read

/* Metric set SamplerBalance :: CS EU Stall */
static float
hsw__sampler_balance__cs_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: CS AVG Stall per Thread */
#define hsw__sampler_balance__cs_eu_stall_per_thread__read \
   hsw__render_basic__cs_eu_stall_per_thread__read

/* Metric set SamplerBalance :: FS EU Active */
static float
hsw__sampler_balance__ps_eu_active__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 27 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: FS AVG Active per Thread */
#define hsw__sampler_balance__ps_eu_active_per_thread__read \
   hsw__render_basic__ps_eu_active_per_thread__read

/* Metric set SamplerBalance :: FS EU Stall */
static float
hsw__sampler_balance__ps_eu_stall__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 28 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Metric set SamplerBalance :: FS AVG Stall per Thread */
#define hsw__sampler_balance__ps_eu_stall_per_thread__read \
   hsw__render_basic__ps_eu_stall_per_thread__read

/* Metric set SamplerBalance :: Early Hi-Depth Test Fails */
#define hsw__sampler_balance__hi_depth_test_fails__read \
   hsw__render_basic__hi_depth_test_fails__read

/* Metric set SamplerBalance :: Early Depth Test Fails */
#define hsw__sampler_balance__early_depth_test_fails__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set SamplerBalance :: Samples Killed in FS */
#define hsw__sampler_balance__samples_killed_in_ps__read \
   hsw__render_basic__samples_killed_in_ps__read

/* Metric set SamplerBalance :: Alpha Test Fails */
#define hsw__sampler_balance__alpha_test_fails__read \
   hsw__render_basic__alpha_test_fails__read

/* Metric set SamplerBalance :: Late Stencil Test Fails */
#define hsw__sampler_balance__post_ps_stencil_test_fails__read \
   hsw__render_basic__post_ps_stencil_test_fails__read

/* Metric set SamplerBalance :: Late Depth Test Fails */
#define hsw__sampler_balance__post_ps_depth_test_fails__read \
   hsw__render_basic__post_ps_depth_test_fails__read

/* Metric set SamplerBalance :: Samples Written */
#define hsw__sampler_balance__samples_written__read \
   hsw__render_basic__samples_written__read

/* Metric set SamplerBalance :: FS Duration */
static uint64_t
hsw__sampler_balance__ps_duration__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 27 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 28 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 27];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 28];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: VS Duration */
static uint64_t
hsw__sampler_balance__vs_duration__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 2 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 3 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 2];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 3];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: GS Duration */
static uint64_t
hsw__sampler_balance__gs_duration__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 22 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 23 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 22];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 23];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: TES Duration */
static uint64_t
hsw__sampler_balance__ds_duration__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 12 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 13 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 12];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 13];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: TCS Duration */
static uint64_t
hsw__sampler_balance__hs_duration__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 8 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 8];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: CS Duration */
static uint64_t
hsw__sampler_balance__cs_duration__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 17 READ  A 0 READ UMUL  A 2 READ  A 7 READ UADD  A 12 READ UADD  A 17 READ UADD  A 22 READ UADD  A 27 READ UADD UDIV  A 18 READ  A 1 READ UMUL  A 3 READ  A 8 READ UADD  A 13 READ UADD  A 18 READ UADD  A 23 READ UADD  A 28 READ UADD UDIV UADD $GpuTime UMUL $GpuCoreClocks $EuCoresTotalCount UMUL 1000 UMUL UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   uint64_t tmp1 = results->accumulator[query->a_offset + 0];
   uint64_t tmp2 = tmp0 * tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 2];
   uint64_t tmp4 = results->accumulator[query->a_offset + 7];
   uint64_t tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->a_offset + 12];
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->a_offset + 17];
   uint64_t tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 22];
   uint64_t tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 27];
   uint64_t tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = tmp2;
   uint64_t tmp15 = tmp13;
   uint64_t tmp16 = tmp15 ? tmp14 / tmp15 : 0;
   uint64_t tmp17 = results->accumulator[query->a_offset + 18];
   uint64_t tmp18 = results->accumulator[query->a_offset + 1];
   uint64_t tmp19 = tmp17 * tmp18;
   uint64_t tmp20 = results->accumulator[query->a_offset + 3];
   uint64_t tmp21 = results->accumulator[query->a_offset + 8];
   uint64_t tmp22 = tmp20 + tmp21;
   uint64_t tmp23 = results->accumulator[query->a_offset + 13];
   uint64_t tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 18];
   uint64_t tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 23];
   uint64_t tmp28 = tmp26 + tmp27;
   uint64_t tmp29 = results->accumulator[query->a_offset + 28];
   uint64_t tmp30 = tmp28 + tmp29;
   uint64_t tmp31 = tmp19;
   uint64_t tmp32 = tmp30;
   uint64_t tmp33 = tmp32 ? tmp31 / tmp32 : 0;
   uint64_t tmp34 = tmp16 + tmp33;
   uint64_t tmp35 = tmp34 * hsw__sampler_balance__gpu_time__read(perf, query, results);
   uint64_t tmp36 = hsw__sampler_balance__gpu_core_clocks__read(perf, query, results) * perf->sys_vars.n_eus;
   uint64_t tmp37 = tmp36 * 1000;
   uint64_t tmp38 = tmp35;
   uint64_t tmp39 = tmp37;
   uint64_t tmp40 = tmp39 ? tmp38 / tmp39 : 0;

   return tmp40;
}

/* Metric set SamplerBalance :: EU Idle */
static float
hsw__sampler_balance__eu_idle__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: 100 $EuActive $EuStall FADD FSUB */
   double tmp0 = hsw__sampler_balance__eu_active__read(perf, query, results) + hsw__sampler_balance__eu_stall__read(perf, query, results);
   double tmp1 = 100 - tmp0;

   return tmp1;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses */
static uint64_t
hsw__sampler_balance__sampler_l2_cache_misses__read(UNUSED struct intel_perf_config *perf,
                                                    const struct intel_perf_query_info *query,
                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ C 6 READ UADD C 5 READ UADD C 4 READ UADD C 3 READ UADD C 2 READ UADD C 1 READ UADD C 0 READ UADD */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 5];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 4];
   uint64_t tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = results->accumulator[query->c_offset + 3];
   uint64_t tmp8 = tmp6 + tmp7;
   uint64_t tmp9 = results->accumulator[query->c_offset + 2];
   uint64_t tmp10 = tmp8 + tmp9;
   uint64_t tmp11 = results->accumulator[query->c_offset + 1];
   uint64_t tmp12 = tmp10 + tmp11;
   uint64_t tmp13 = results->accumulator[query->c_offset + 0];
   uint64_t tmp14 = tmp12 + tmp13;

   return tmp14;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss0) */
static uint64_t
hsw__sampler_balance__sampler0_l2_cache_misses__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ C 6 READ UADD */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss1) */
static uint64_t
hsw__sampler_balance__sampler1_l2_cache_misses__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ C 4 READ UADD */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = results->accumulator[query->c_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss2) */
static uint64_t
hsw__sampler_balance__sampler2_l2_cache_misses__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ C 2 READ UADD */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: Sampler L2 cache misses (ss3) */
static uint64_t
hsw__sampler_balance__sampler3_l2_cache_misses__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ C 0 READ UADD */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = results->accumulator[query->c_offset + 0];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Metric set SamplerBalance :: LLC GPU Accesses */
#define hsw__sampler_balance__llc_accesses__read \
   hsw__render_basic__llc_accesses__read

/* Metric set SamplerBalance :: LLC GPU Hits */
#define hsw__sampler_balance__llc_hits__read \
   hsw__render_basic__llc_hits__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define bdw__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
static uint64_t
bdw__render_basic__gpu_core_clocks__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];

   return tmp0;
}

/* Render Metrics Basic set :: AVG GPU Core Frequency */
static uint64_t
bdw__render_basic__avg_gpu_core_frequency__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: $GpuCoreClocks 1000000000 UMUL $GpuTime UDIV */
   uint64_t tmp0 = bdw__render_basic__gpu_core_clocks__read(perf, query, results) * 1000000000;
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = bdw__render_basic__gpu_time__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define bdw__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
static uint64_t
bdw__render_basic__vs_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 1 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 1];

   return tmp0;
}

/* Render Metrics Basic set :: HS Threads Dispatched */
static uint64_t
bdw__render_basic__hs_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 2 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 2];

   return tmp0;
}

/* Render Metrics Basic set :: DS Threads Dispatched */
static uint64_t
bdw__render_basic__ds_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 3 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 3];

   return tmp0;
}

/* Render Metrics Basic set :: GS Threads Dispatched */
#define bdw__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
static uint64_t
bdw__render_basic__ps_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 6 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 6];

   return tmp0;
}

/* Render Metrics Basic set :: CS Threads Dispatched */
static uint64_t
bdw__render_basic__cs_threads__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: A 4 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 4];

   return tmp0;
}

/* Render Metrics Basic set :: GPU Busy */
static float
bdw__render_basic__gpu_busy__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: EU Active */
static float
bdw__render_basic__eu_active__read(UNUSED struct intel_perf_config *perf,
                                   const struct intel_perf_query_info *query,
                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: EU Stall */
static float
bdw__render_basic__eu_stall__read(UNUSED struct intel_perf_config *perf,
                                  const struct intel_perf_query_info *query,
                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 8 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 8];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
static float
bdw__render_basic__eu_fpu_both_active__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: A 9 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 9];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
static float
bdw__render_basic__vs_fpu0_active__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 10 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 10];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
static float
bdw__render_basic__vs_fpu1_active__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 11 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 11];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: VS Send Pipe Active */
static float
bdw__render_basic__vs_send_active__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 12 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 12];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
static float
bdw__render_basic__ps_fpu0_active__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 15 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 15];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
static float
bdw__render_basic__ps_fpu1_active__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 16 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 16];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: PS Send Pipeline Active */
static float
bdw__render_basic__ps_send_active__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 17 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: FS Both FPU Active */
static float
bdw__render_basic__ps_eu_both_fpu_active__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: A 18 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 18];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Render Metrics Basic set :: Sampler 0 Busy */
static float
bdw__render_basic__sampler0_busy__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Sampler 1 Busy */
static float
bdw__render_basic__sampler1_busy__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: B 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Samplers Busy */
static float
bdw__render_basic__samplers_busy__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: $Sampler0Busy $Sampler1Busy FMAX */
   double tmp0 = bdw__render_basic__sampler0_busy__read(perf, query, results);
   double tmp1 = bdw__render_basic__sampler1_busy__read(perf, query, results);
   double tmp2 = MAX(tmp0, tmp1);

   return tmp2;
}

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
static float
bdw__render_basic__sampler0_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
static float
bdw__render_basic__sampler1_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics Basic set :: Samples Written */
static uint64_t
bdw__render_basic__samples_written__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 26 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 26];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Samples Blended */
static uint64_t
bdw__render_basic__samples_blended__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 27 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 27];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Sampler Texels */
static uint64_t
bdw__render_basic__sampler_texels__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 28 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 28];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Sampler Texels Misses */
static uint64_t
bdw__render_basic__sampler_texel_misses__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: A 29 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Sampler Cache Misses */
static uint64_t
bdw__render_basic__sampler_l1_misses__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ B 5 READ UADD 8 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 8;

   return tmp3;
}

/* Render Metrics Basic set :: Rasterized Pixels */
static uint64_t
bdw__render_basic__rasterized_pixels__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: A 21 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 21];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
static uint64_t
bdw__render_basic__hi_depth_test_fails__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: A 22 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 22];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Early Depth Test Fails */
static uint64_t
bdw__render_basic__early_depth_test_fails__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 23 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 23];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Samples Killed in FS */
static uint64_t
bdw__render_basic__samples_killed_in_ps__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: A 24 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 24];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: Pixels Failing Tests */
static uint64_t
bdw__render_basic__pixels_failing_post_ps_tests__read(UNUSED struct intel_perf_config *perf,
                                                      const struct intel_perf_query_info *query,
                                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 25 READ 4 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 25];
   uint64_t tmp1 = tmp0 * 4;

   return tmp1;
}

/* Render Metrics Basic set :: SLM Bytes Read */
static uint64_t
bdw__render_basic__slm_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 30 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 30];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: SLM Bytes Written */
static uint64_t
bdw__render_basic__slm_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: A 31 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 31];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: Shader Memory Accesses */
static uint64_t
bdw__render_basic__shader_memory_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: A 32 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 32];

   return tmp0;
}

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
static uint64_t
bdw__render_basic__shader_atomics__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 34 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 34];

   return tmp0;
}

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
static uint64_t
bdw__render_basic__l3_lookups__read(UNUSED struct intel_perf_config *perf,
                                    const struct intel_perf_query_info *query,
                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: $SamplerL1Misses $ShaderMemoryAccesses UADD */
   uint64_t tmp0 = bdw__render_basic__sampler_l1_misses__read(perf, query, results) + bdw__render_basic__shader_memory_accesses__read(perf, query, results);

   return tmp0;
}

/* Render Metrics Basic set :: L3 Misses */
#define bdw__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
static uint64_t
bdw__render_basic__l3_sampler_throughput__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: $SamplerL1Misses 64 UMUL */
   uint64_t tmp0 = bdw__render_basic__sampler_l1_misses__read(perf, query, results) * 64;

   return tmp0;
}

/* Render Metrics Basic set :: L3 Shader Throughput */
static uint64_t
bdw__render_basic__l3_shader_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: A 30 READ  A 31 READ $ShaderMemoryAccesses UADD UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 30];
   uint64_t tmp1 = results->accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp1 + bdw__render_basic__shader_memory_accesses__read(perf, query, results);
   uint64_t tmp3 = tmp0 + tmp2;
   uint64_t tmp4 = tmp3 * 64;

   return tmp4;
}

/* Render Metrics Basic set :: Shader Barrier Messages */
#define bdw__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
static uint64_t
bdw__render_basic__gti_vf_throughput__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ B 7 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];
   uint64_t tmp1 = results->accumulator[query->b_offset + 7];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Render Metrics Basic set :: GTI Depth Throughput */
static uint64_t
bdw__render_basic__gti_depth_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ C 1 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Render Metrics Basic set :: GTI RCC Throughput */
static uint64_t
bdw__render_basic__gti_rcc_throughput__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: C 2 READ C 3 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 2];
   uint64_t tmp1 = results->accumulator[query->c_offset + 3];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Render Metrics Basic set :: GTI L3 Throughput */
#define bdw__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
static uint64_t
bdw__render_basic__gti_hdc_lookups_throughput__read(UNUSED struct intel_perf_config *perf,
                                                    const struct intel_perf_query_info *query,
                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: GTI Read Throughput */
static uint64_t
bdw__render_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: C 6 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: GTI Write Throughput */
#define bdw__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
static float
bdw__render_basic__sampler_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: $Sampler0Bottleneck $Sampler1Bottleneck FMAX */
   double tmp0 = bdw__render_basic__sampler0_bottleneck__read(perf, query, results);
   double tmp1 = bdw__render_basic__sampler1_bottleneck__read(perf, query, results);
   double tmp2 = MAX(tmp0, tmp1);

   return tmp2;
}

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define bdw__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define bdw__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define bdw__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define bdw__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define bdw__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define bdw__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define bdw__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define bdw__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define bdw__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define bdw__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define bdw__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define bdw__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define bdw__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define bdw__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define bdw__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define bdw__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
static float
bdw__compute_basic__eu_avg_ipc_rate__read(UNUSED struct intel_perf_config *perf,
                                          const struct intel_perf_query_info *query,
                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: A 9 READ  A 10 READ  A 11 READ UADD  A 9 READ USUB FDIV 1 FADD */
   uint64_t tmp0 = results->accumulator[query->a_offset + 9];
   uint64_t tmp1 = results->accumulator[query->a_offset + 10];
   uint64_t tmp2 = results->accumulator[query->a_offset + 11];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = results->accumulator[query->a_offset + 9];
   uint64_t tmp5 = tmp3 - tmp4;
   double tmp6 = tmp0;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 + 1;

   return tmp9;
}

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define bdw__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
static float
bdw__compute_basic__eu_thread_occupancy__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: A 13 READ 8 UMUL $EuCoresTotalCount UDIV $EuThreadsCount UDIV 100 UMUL  $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0 * 8;
   uint64_t tmp2 = tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eus;
   uint64_t tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = perf->sys_vars.eu_threads_count;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = bdw__compute_basic__gpu_core_clocks__read(perf, query, results);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* Compute Metrics Basic set :: Rasterized Pixels */
#define bdw__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define bdw__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define bdw__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define bdw__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define bdw__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define bdw__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define bdw__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define bdw__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define bdw__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define bdw__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define bdw__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define bdw__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define bdw__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define bdw__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define bdw__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
static uint64_t
bdw__compute_basic__typed_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 1 READ B 2 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   uint64_t tmp2 = results->accumulator[query->b_offset + 2];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic set :: Typed Bytes Written */
static uint64_t
bdw__compute_basic__typed_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ B 4 READ B 5 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = results->accumulator[query->b_offset + 4];
   uint64_t tmp2 = results->accumulator[query->b_offset + 5];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic set :: Untyped Bytes Read */
static uint64_t
bdw__compute_basic__untyped_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ B 7 READ C 0 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];
   uint64_t tmp1 = results->accumulator[query->b_offset + 7];
   uint64_t tmp2 = results->accumulator[query->c_offset + 0];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic set :: Untyped Writes */
static uint64_t
bdw__compute_basic__untyped_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ C 2 READ C 3 READ UADD UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   uint64_t tmp2 = results->accumulator[query->c_offset + 3];
   uint64_t tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = tmp0 + tmp3;
   uint64_t tmp5 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp6 = tmp4 * tmp5;

   return tmp6;
}

/* Compute Metrics Basic set :: GTI Read Throughput */
#define bdw__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define bdw__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics for 3D Pipeline Profile :: GPU Time Elapsed */
#define bdw__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics for 3D Pipeline Profile :: GPU Core Clocks */
#define bdw__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define bdw__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define bdw__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics for 3D Pipeline Profile :: GPU Busy */
#define bdw__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics for 3D Pipeline Profile :: VS Threads Dispatched */
#define bdw__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics for 3D Pipeline Profile :: HS Threads Dispatched */
#define bdw__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics for 3D Pipeline Profile :: DS Threads Dispatched */
#define bdw__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics for 3D Pipeline Profile :: GS Threads Dispatched */
#define bdw__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics for 3D Pipeline Profile :: FS Threads Dispatched */
#define bdw__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics for 3D Pipeline Profile :: CS Threads Dispatched */
#define bdw__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics for 3D Pipeline Profile :: EU Active */
#define bdw__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics for 3D Pipeline Profile :: EU Stall */
#define bdw__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics for 3D Pipeline Profile :: Rasterized Pixels */
#define bdw__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define bdw__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: Early Depth Test Fails */
#define bdw__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: Samples Killed in FS */
#define bdw__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics for 3D Pipeline Profile :: Pixels Failing Tests */
#define bdw__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics for 3D Pipeline Profile :: Samples Written */
#define bdw__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics for 3D Pipeline Profile :: Samples Blended */
#define bdw__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics for 3D Pipeline Profile :: Sampler Accesses */
static uint64_t
bdw__render_pipe_profile__sampler_accesses__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: A 28 READ */
   uint64_t tmp0 = results->accumulator[query->a_offset + 28];

   return tmp0;
}

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels */
#define bdw__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels Misses */
#define bdw__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Read */
#define bdw__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Written */
#define bdw__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics for 3D Pipeline Profile :: Shader Memory Accesses */
#define bdw__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define bdw__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics for 3D Pipeline Profile :: L3 Shader Throughput */
#define bdw__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics for 3D Pipeline Profile :: Shader Barrier Messages */
#define bdw__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: VF Bottleneck */
static float
bdw__render_pipe_profile__vf_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: VS Bottleneck */
#define bdw__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics for 3D Pipeline Profile :: HS Bottleneck */
#define bdw__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics for 3D Pipeline Profile :: DS Bottleneck */
#define bdw__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: GS Bottleneck */
#define bdw__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: SO Bottleneck */
static float
bdw__render_pipe_profile__so_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Clipper Bottleneck */
static float
bdw__render_pipe_profile__cl_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 5 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Strip-Fans Bottleneck */
static float
bdw__render_pipe_profile__sf_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Hi-Depth Bottleneck */
static float
bdw__render_pipe_profile__hi_depth_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                                    const struct intel_perf_query_info *query,
                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: C 6 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 6];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: Early Depth Bottleneck */
static float
bdw__render_pipe_profile__early_depth_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                                       const struct intel_perf_query_info *query,
                                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: BC Bottleneck */
static float
bdw__render_pipe_profile__bc_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: HS Stall */
static float
bdw__render_pipe_profile__hs_stall__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: DS Stall */
static float
bdw__render_pipe_profile__ds_stall__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 2 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 2];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: SO Stall */
static float
bdw__render_pipe_profile__so_stall__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: CL Stall */
static float
bdw__render_pipe_profile__cl_stall__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Render Metrics for 3D Pipeline Profile :: SF Stall */
static float
bdw__render_pipe_profile__sf_stall__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__render_pipe_profile__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Memory Reads Distribution metric set :: GPU Time Elapsed */
#define bdw__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metric set :: GPU Core Clocks */
#define bdw__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metric set :: AVG GPU Core Frequency */
#define bdw__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metric set :: AVG GPU Core Frequency */
#define bdw__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metric set :: GPU Busy */
#define bdw__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metric set :: VS Threads Dispatched */
#define bdw__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metric set :: HS Threads Dispatched */
#define bdw__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metric set :: DS Threads Dispatched */
#define bdw__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metric set :: GS Threads Dispatched */
#define bdw__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metric set :: FS Threads Dispatched */
#define bdw__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metric set :: CS Threads Dispatched */
#define bdw__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metric set :: EU Active */
#define bdw__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metric set :: EU Stall */
#define bdw__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metric set :: Rasterized Pixels */
#define bdw__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metric set :: Early Hi-Depth Test Fails */
#define bdw__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metric set :: Early Depth Test Fails */
#define bdw__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metric set :: Samples Killed in FS */
#define bdw__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metric set :: Pixels Failing Tests */
#define bdw__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metric set :: Samples Written */
#define bdw__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metric set :: Samples Blended */
#define bdw__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metric set :: Sampler Texels */
#define bdw__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metric set :: Sampler Texels Misses */
#define bdw__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metric set :: SLM Bytes Read */
#define bdw__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metric set :: SLM Bytes Written */
#define bdw__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metric set :: Shader Memory Accesses */
#define bdw__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metric set :: Shader Atomic Memory Accesses */
#define bdw__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metric set :: Shader Barrier Messages */
#define bdw__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metric set :: L3 Shader Throughput */
#define bdw__memory_reads__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Reads Distribution metric set :: GtiCmdStreamerMemoryReads */
#define bdw__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metric set :: GtiRsMemoryReads */
#define bdw__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metric set :: GtiVfMemoryReads */
#define bdw__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metric set :: GtiRccMemoryReads */
#define bdw__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metric set :: GtiMscMemoryReads */
#define bdw__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metric set :: GtiHizMemoryReads */
#define bdw__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metric set :: GtiStcMemoryReads */
#define bdw__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metric set :: GtiRczMemoryReads */
#define bdw__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metric set :: GtiMemoryReads */
#define bdw__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metric set :: GtiL3Bank0Reads */
#define bdw__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metric set :: GtiL3Bank1Reads */
#define bdw__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metric set :: GtiL3Bank2Reads */
#define bdw__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metric set :: GtiL3Bank3Reads */
#define bdw__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metric set :: GtiL3Reads */
static uint64_t
bdw__memory_reads__gti_l3_reads__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: $GtiL3Bank0Reads $GtiL3Bank1Reads $GtiL3Bank2Reads $GtiL3Bank3Reads UADD UADD UADD */
   uint64_t tmp0 = bdw__memory_reads__gti_l3_bank2_reads__read(perf, query, results) + bdw__memory_reads__gti_l3_bank3_reads__read(perf, query, results);
   uint64_t tmp1 = bdw__memory_reads__gti_l3_bank1_reads__read(perf, query, results) + tmp0;
   uint64_t tmp2 = bdw__memory_reads__gti_l3_bank0_reads__read(perf, query, results) + tmp1;

   return tmp2;
}

/* Memory Reads Distribution metric set :: GtiRingAccesses */
static uint64_t
bdw__memory_reads__gti_ring_accesses__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Memory Writes Distribution metric set :: GPU Time Elapsed */
#define bdw__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metric set :: GPU Core Clocks */
#define bdw__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metric set :: AVG GPU Core Frequency */
#define bdw__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metric set :: AVG GPU Core Frequency */
#define bdw__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metric set :: GPU Busy */
#define bdw__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metric set :: VS Threads Dispatched */
#define bdw__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metric set :: HS Threads Dispatched */
#define bdw__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metric set :: DS Threads Dispatched */
#define bdw__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metric set :: GS Threads Dispatched */
#define bdw__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metric set :: FS Threads Dispatched */
#define bdw__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metric set :: CS Threads Dispatched */
#define bdw__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metric set :: EU Active */
#define bdw__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metric set :: EU Stall */
#define bdw__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metric set :: Rasterized Pixels */
#define bdw__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metric set :: Early Hi-Depth Test Fails */
#define bdw__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metric set :: Early Depth Test Fails */
#define bdw__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metric set :: Samples Killed in FS */
#define bdw__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metric set :: Pixels Failing Tests */
#define bdw__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metric set :: Samples Written */
#define bdw__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metric set :: Samples Blended */
#define bdw__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metric set :: Sampler Texels */
#define bdw__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metric set :: Sampler Texels Misses */
#define bdw__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metric set :: SLM Bytes Read */
#define bdw__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metric set :: SLM Bytes Written */
#define bdw__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metric set :: Shader Memory Accesses */
#define bdw__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metric set :: Shader Atomic Memory Accesses */
#define bdw__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metric set :: L3 Shader Throughput */
#define bdw__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metric set :: Shader Barrier Messages */
#define bdw__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metric set :: GtiCmdStreamerMemoryWrites */
#define bdw__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metric set :: GtiSoMemoryWrites */
#define bdw__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metric set :: GtiRccMemoryWrites */
#define bdw__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metric set :: GtiMscMemoryWrites */
#define bdw__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metric set :: GtiHizMemoryWrites */
#define bdw__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metric set :: GtiStcMemoryWrites */
#define bdw__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metric set :: GtiRczMemoryWrites */
#define bdw__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metric set :: GtiMemoryWrites */
#define bdw__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metric set :: GtiL3Bank0Writes */
#define bdw__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metric set :: GtiL3Bank1Writes */
#define bdw__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metric set :: GtiL3Bank2Writes */
#define bdw__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metric set :: GtiL3Bank3Writes */
#define bdw__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metric set :: GtiL3Writes */
#define bdw__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metric set :: GtiRingAccesses */
#define bdw__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended metric set :: GPU Time Elapsed */
#define bdw__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended metric set :: GPU Core Clocks */
#define bdw__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metric set :: AVG GPU Core Frequency */
#define bdw__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended metric set :: AVG GPU Core Frequency */
#define bdw__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended metric set :: CS Threads Dispatched */
#define bdw__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended metric set :: EU Active */
#define bdw__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended metric set :: EU Stall */
#define bdw__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended metric set :: EU Both FPU Pipes Active */
#define bdw__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended metric set :: EU FPU0 Pipe Active */
#define bdw__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended metric set :: EU FPU1 Pipe Active */
#define bdw__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended metric set :: EU AVG IPC Rate */
#define bdw__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended metric set :: EU Send Pipe Active */
#define bdw__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended metric set :: EU Thread Occupancy */
#define bdw__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended metric set :: Sampler Texels */
#define bdw__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended metric set :: Sampler Texels Misses */
#define bdw__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended metric set :: SLM Bytes Read */
#define bdw__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended metric set :: SLM Bytes Written */
#define bdw__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended metric set :: Shader Memory Accesses */
#define bdw__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended metric set :: Shader Atomic Memory Accesses */
#define bdw__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended metric set :: L3 Shader Throughput */
#define bdw__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended metric set :: Shader Barrier Messages */
#define bdw__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended metric set :: EuUntypedReads0 */
#define bdw__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended metric set :: EuTypedReads0 */
#define bdw__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended metric set :: EuUntypedWrites0 */
#define bdw__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended metric set :: EuTypedWrites0 */
#define bdw__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended metric set :: EuUntypedAtomics0 */
#define bdw__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended metric set :: EuTypedAtomics0 */
#define bdw__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended metric set :: EuA64UntypedReads0 */
#define bdw__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended metric set :: EuA64UntypedWrites0 */
#define bdw__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended metric set :: Typed Reads 0 */
#define bdw__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metric set :: Typed Writes 0 */
#define bdw__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended metric set :: Untyped Reads 0 */
#define bdw__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended metric set :: Untyped Writes 0 */
#define bdw__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended metric set :: Typed Atomics 0 */
#define bdw__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended metric set :: TypedReadsPerCacheLine */
#define bdw__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended metric set :: TypedWritesPerCacheLine */
#define bdw__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended metric set :: UntypedReadsPerCacheLine */
static float
bdw__compute_extended__untyped_reads_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                          const struct intel_perf_query_info *query,
                                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuUntypedReads0 $EuA64UntypedReads0 UADD $UntypedReads0 FDIV */
   uint64_t tmp0 = bdw__compute_extended__eu_untyped_reads0__read(perf, query, results) + bdw__compute_extended__eu_a64_untyped_reads0__read(perf, query, results);
   double tmp1 = tmp0;
   double tmp2 = bdw__compute_extended__untyped_reads0__read(perf, query, results);
   double tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Extended metric set :: UntypedWritesPerCacheLine */
static float
bdw__compute_extended__untyped_writes_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                           const struct intel_perf_query_info *query,
                                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuUntypedWrites0 $EuA64UntypedWrites0 UADD $UntypedWrites0 FDIV */
   uint64_t tmp0 = bdw__compute_extended__eu_untyped_writes0__read(perf, query, results) + bdw__compute_extended__eu_a64_untyped_writes0__read(perf, query, results);
   double tmp1 = tmp0;
   double tmp2 = bdw__compute_extended__untyped_writes0__read(perf, query, results);
   double tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Compute Metrics Extended metric set :: TypedAtomicsPerCacheLine */
#define bdw__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache metric set :: GPU Time Elapsed */
#define bdw__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache metric set :: GPU Core Clocks */
#define bdw__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache metric set :: AVG GPU Core Frequency */
#define bdw__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache metric set :: AVG GPU Core Frequency */
#define bdw__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache metric set :: GPU Busy */
#define bdw__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache metric set :: VS Threads Dispatched */
#define bdw__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metric set :: HS Threads Dispatched */
#define bdw__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache metric set :: DS Threads Dispatched */
#define bdw__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache metric set :: GS Threads Dispatched */
#define bdw__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metric set :: FS Threads Dispatched */
#define bdw__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache metric set :: CS Threads Dispatched */
#define bdw__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache metric set :: EU Active */
#define bdw__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache metric set :: EU Stall */
#define bdw__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache metric set :: EU Both FPU Pipes Active */
#define bdw__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Pipe Active */
#define bdw__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Pipe Active */
#define bdw__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache metric set :: EU AVG IPC Rate */
#define bdw__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache metric set :: EU Send Pipe Active */
#define bdw__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Hybrid Instruction */
static float
bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read(UNUSED struct intel_perf_config *perf,
                                                        const struct intel_perf_query_info *query,
                                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 13 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache metric set :: EU FPU1 Hybrid Instruction */
static float
bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read(UNUSED struct intel_perf_config *perf,
                                                        const struct intel_perf_query_info *query,
                                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: A 14 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 14];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache metric set :: EU FPU0 Ternary Instruction */
#define bdw__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Ternary Instruction */
#define bdw__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Binary Instruction */
#define bdw__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Binary Instruction */
#define bdw__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Move Instruction */
static float
bdw__compute_l3_cache__eu_move_fpu0_instruction__read(UNUSED struct intel_perf_config *perf,
                                                      const struct intel_perf_query_info *query,
                                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 19 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 19];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache metric set :: EU FPU1 Move Instruction */
static float
bdw__compute_l3_cache__eu_move_fpu1_instruction__read(UNUSED struct intel_perf_config *perf,
                                                      const struct intel_perf_query_info *query,
                                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 20 READ $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 20];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = perf->sys_vars.n_eus;
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;
   uint64_t tmp4 = tmp3 * 100;
   double tmp5 = tmp4;
   double tmp6 = bdw__compute_l3_cache__gpu_core_clocks__read(perf, query, results);
   double tmp7 = tmp6 ? tmp5 / tmp6 : 0;

   return tmp7;
}

/* Compute Metrics L3 Cache metric set :: Rasterized Pixels */
#define bdw__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache metric set :: Early Hi-Depth Test Fails */
#define bdw__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache metric set :: Early Depth Test Fails */
#define bdw__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metric set :: Samples Killed in FS */
#define bdw__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache metric set :: Pixels Failing Tests */
#define bdw__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache metric set :: Samples Written */
#define bdw__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache metric set :: Samples Blended */
#define bdw__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache metric set :: Sampler Texels */
#define bdw__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache metric set :: Sampler Texels Misses */
#define bdw__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache metric set :: SLM Bytes Read */
#define bdw__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache metric set :: SLM Bytes Written */
#define bdw__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache metric set :: Shader Memory Accesses */
#define bdw__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache metric set :: Shader Atomic Memory Accesses */
#define bdw__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache metric set :: L3 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_accesses__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ C 1 READ B 2 READ B 3 READ C 2 READ C 3 READ B 6 READ B 7 READ UADD UADD UADD UADD UADD UADD UADD 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   uint64_t tmp2 = results->accumulator[query->b_offset + 2];
   uint64_t tmp3 = results->accumulator[query->b_offset + 3];
   uint64_t tmp4 = results->accumulator[query->c_offset + 2];
   uint64_t tmp5 = results->accumulator[query->c_offset + 3];
   uint64_t tmp6 = results->accumulator[query->b_offset + 6];
   uint64_t tmp7 = results->accumulator[query->b_offset + 7];
   uint64_t tmp8 = tmp6 + tmp7;
   uint64_t tmp9 = tmp5 + tmp8;
   uint64_t tmp10 = tmp4 + tmp9;
   uint64_t tmp11 = tmp3 + tmp10;
   uint64_t tmp12 = tmp2 + tmp11;
   uint64_t tmp13 = tmp1 + tmp12;
   uint64_t tmp14 = tmp0 + tmp13;
   uint64_t tmp15 = tmp14 * 2;

   return tmp15;
}

/* Compute Metrics L3 Cache metric set :: L3 Misses */
static uint64_t
bdw__compute_l3_cache__l3_misses__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ C 5 READ UADD */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Compute Metrics L3 Cache metric set :: L3 Shader Throughput */
#define bdw__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache metric set :: L3 Total Throughput */
static uint64_t
bdw__compute_l3_cache__l3_total_throughput__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: $L3Accesses 64 UMUL */
   uint64_t tmp0 = bdw__compute_l3_cache__l3_accesses__read(perf, query, results) * 64;

   return tmp0;
}

/* Compute Metrics L3 Cache metric set :: Shader Barrier Messages */
#define bdw__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 00 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank00_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 01 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank01_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 02 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank02_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 03 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank03_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 10 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank10_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 2 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 2];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 11 Accesses */
#define bdw__compute_l3_cache__l3_bank11_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 12 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank12_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 13 Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank13_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 00 IC Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank00_ic_accesses__read(UNUSED struct intel_perf_config *perf,
                                                   const struct intel_perf_query_info *query,
                                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 1 READ UADD 2 UMUL $L3Bank00Accesses UMIN */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = MIN(tmp3, bdw__compute_l3_cache__l3_bank00_accesses__read(perf, query, results));

   return tmp4;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 00 IC Hits */
static uint64_t
bdw__compute_l3_cache__l3_bank00_ic_hits__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: B 1 READ 2 UMUL $L3Bank00IcAccesses UMIN */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];
   uint64_t tmp1 = tmp0 * 2;
   uint64_t tmp2 = MIN(tmp1, bdw__compute_l3_cache__l3_bank00_ic_accesses__read(perf, query, results));

   return tmp2;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 10 IC Accesses */
static uint64_t
bdw__compute_l3_cache__l3_bank10_ic_accesses__read(UNUSED struct intel_perf_config *perf,
                                                   const struct intel_perf_query_info *query,
                                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ B 5 READ UADD 2 UMUL $L3Bank10Accesses UMIN */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = MIN(tmp3, bdw__compute_l3_cache__l3_bank10_accesses__read(perf, query, results));

   return tmp4;
}

/* Compute Metrics L3 Cache metric set :: L3 Bank 10 IC Hits */
static uint64_t
bdw__compute_l3_cache__l3_bank10_ic_hits__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: B 5 READ 2 UMUL $L3Bank10IcAccesses UMIN */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];
   uint64_t tmp1 = tmp0 * 2;
   uint64_t tmp2 = MIN(tmp1, bdw__compute_l3_cache__l3_bank10_ic_accesses__read(perf, query, results));

   return tmp2;
}

/* Compute Metrics L3 Cache metric set :: GTI L3 Throughput */
static uint64_t
bdw__compute_l3_cache__gti_l3_throughput__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ C 5 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Compute Metrics L3 Cache metric set :: GTI Read Throughput */
#define bdw__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache metric set :: GTI Write Throughput */
#define bdw__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Data Port Reads Coalescing metric set :: GPU Time Elapsed */
#define bdw__data_port_reads_coalescing__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Data Port Reads Coalescing metric set :: GPU Core Clocks */
#define bdw__data_port_reads_coalescing__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Data Port Reads Coalescing metric set :: AVG GPU Core Frequency */
#define bdw__data_port_reads_coalescing__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Data Port Reads Coalescing metric set :: AVG GPU Core Frequency */
#define bdw__data_port_reads_coalescing__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Data Port Reads Coalescing metric set :: VS Threads Dispatched */
#define bdw__data_port_reads_coalescing__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Data Port Reads Coalescing metric set :: HS Threads Dispatched */
#define bdw__data_port_reads_coalescing__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Data Port Reads Coalescing metric set :: DS Threads Dispatched */
#define bdw__data_port_reads_coalescing__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Data Port Reads Coalescing metric set :: GS Threads Dispatched */
#define bdw__data_port_reads_coalescing__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Data Port Reads Coalescing metric set :: FS Threads Dispatched */
#define bdw__data_port_reads_coalescing__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Data Port Reads Coalescing metric set :: CS Threads Dispatched */
#define bdw__data_port_reads_coalescing__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Data Port Reads Coalescing metric set :: EU Active */
#define bdw__data_port_reads_coalescing__eu_active__read \
   bdw__render_basic__eu_active__read

/* Data Port Reads Coalescing metric set :: EU Stall */
#define bdw__data_port_reads_coalescing__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Data Port Reads Coalescing metric set :: EU Both FPU Pipes Active */
#define bdw__data_port_reads_coalescing__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Data Port Reads Coalescing metric set :: EU FPU0 Pipe Active */
#define bdw__data_port_reads_coalescing__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Data Port Reads Coalescing metric set :: EU FPU1 Pipe Active */
#define bdw__data_port_reads_coalescing__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Data Port Reads Coalescing metric set :: EU AVG IPC Rate */
#define bdw__data_port_reads_coalescing__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Data Port Reads Coalescing metric set :: EU Send Pipe Active */
#define bdw__data_port_reads_coalescing__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Data Port Reads Coalescing metric set :: EU Thread Occupancy */
#define bdw__data_port_reads_coalescing__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Data Port Reads Coalescing metric set :: Sampler Texels */
#define bdw__data_port_reads_coalescing__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Data Port Reads Coalescing metric set :: Sampler Texels Misses */
#define bdw__data_port_reads_coalescing__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Data Port Reads Coalescing metric set :: SLM Bytes Read */
#define bdw__data_port_reads_coalescing__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Data Port Reads Coalescing metric set :: SLM Bytes Written */
#define bdw__data_port_reads_coalescing__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Data Port Reads Coalescing metric set :: Shader Memory Accesses */
#define bdw__data_port_reads_coalescing__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Data Port Reads Coalescing metric set :: Shader Atomic Memory Accesses */
#define bdw__data_port_reads_coalescing__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Data Port Reads Coalescing metric set :: L3 Shader Throughput */
#define bdw__data_port_reads_coalescing__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Data Port Reads Coalescing metric set :: Shader Barrier Messages */
#define bdw__data_port_reads_coalescing__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Data Port Reads Coalescing metric set :: EU to Data Port 0 Reads 32 */
#define bdw__data_port_reads_coalescing__eu_hdc0_reads32_b__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Data Port Reads Coalescing metric set :: EU to Data Port 0 Reads 64 */
#define bdw__data_port_reads_coalescing__eu_hdc0_reads64_b__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Data Port Reads Coalescing metric set :: EU to Data Port 0 Reads 128 */
#define bdw__data_port_reads_coalescing__eu_hdc0_reads128_b__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Data Port Reads Coalescing metric set :: EU to Data Port 0 Reads 256 */
#define bdw__data_port_reads_coalescing__eu_hdc0_reads256_b__read \
   hsw__compute_extended__gpu_clocks__read

/* Data Port Reads Coalescing metric set :: Data Port 0 to L3 Data Reads */
#define bdw__data_port_reads_coalescing__hdc0_l3_data_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Data Port Reads Coalescing metric set :: Data Port 0 to L3 Data Writes */
#define bdw__data_port_reads_coalescing__hdc0_l3_data_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Data Port Reads Coalescing metric set :: All Data Port 0 Reads from L3 */
static uint64_t
bdw__data_port_reads_coalescing__hdc0_l3_reads__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ C 2 READ USUB */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 - tmp1;

   return tmp2;
}

/* Data Port Reads Coalescing metric set :: All Data Port 0 Writes to L3 */
#define bdw__data_port_reads_coalescing__hdc0_l3_writes__read \
   hsw__render_basic__gpu_core_clocks__read

/* Data Port Reads Coalescing metric set :: EuBytesReadPerCacheLine */
static float
bdw__data_port_reads_coalescing__eu_bytes_read_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                                    const struct intel_perf_query_info *query,
                                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuHdc0Reads32B 32 UMUL $EuHdc0Reads64B 64 UMUL $EuHdc0Reads128B 128 UMUL $EuHdc0Reads256B 256 UMUL UADD UADD UADD $Hdc0L3DataReads FDIV */
   uint64_t tmp0 = bdw__data_port_reads_coalescing__eu_hdc0_reads32_b__read(perf, query, results) * 32;
   uint64_t tmp1 = bdw__data_port_reads_coalescing__eu_hdc0_reads64_b__read(perf, query, results) * 64;
   uint64_t tmp2 = bdw__data_port_reads_coalescing__eu_hdc0_reads128_b__read(perf, query, results) * 128;
   uint64_t tmp3 = bdw__data_port_reads_coalescing__eu_hdc0_reads256_b__read(perf, query, results) * 256;
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = tmp1 + tmp4;
   uint64_t tmp6 = tmp0 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = bdw__data_port_reads_coalescing__hdc0_l3_data_reads__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* Data Port Reads Coalescing metric set :: EuDataReadsPerCacheLine */
static float
bdw__data_port_reads_coalescing__eu_data_reads_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                                    const struct intel_perf_query_info *query,
                                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuBytesReadPerCacheLine 64 FDIV */
   double tmp0 = bdw__data_port_reads_coalescing__eu_bytes_read_per_cache_line__read(perf, query, results);
   double tmp1 = 64;
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Data Port Writes Coalescing metric set :: GPU Time Elapsed */
#define bdw__data_port_writes_coalescing__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Data Port Writes Coalescing metric set :: GPU Core Clocks */
#define bdw__data_port_writes_coalescing__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Data Port Writes Coalescing metric set :: AVG GPU Core Frequency */
#define bdw__data_port_writes_coalescing__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Data Port Writes Coalescing metric set :: AVG GPU Core Frequency */
#define bdw__data_port_writes_coalescing__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Data Port Writes Coalescing metric set :: VS Threads Dispatched */
#define bdw__data_port_writes_coalescing__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Data Port Writes Coalescing metric set :: HS Threads Dispatched */
#define bdw__data_port_writes_coalescing__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Data Port Writes Coalescing metric set :: DS Threads Dispatched */
#define bdw__data_port_writes_coalescing__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Data Port Writes Coalescing metric set :: GS Threads Dispatched */
#define bdw__data_port_writes_coalescing__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Data Port Writes Coalescing metric set :: FS Threads Dispatched */
#define bdw__data_port_writes_coalescing__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Data Port Writes Coalescing metric set :: CS Threads Dispatched */
#define bdw__data_port_writes_coalescing__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Data Port Writes Coalescing metric set :: EU Active */
#define bdw__data_port_writes_coalescing__eu_active__read \
   bdw__render_basic__eu_active__read

/* Data Port Writes Coalescing metric set :: EU Stall */
#define bdw__data_port_writes_coalescing__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Data Port Writes Coalescing metric set :: EU Both FPU Pipes Active */
#define bdw__data_port_writes_coalescing__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Data Port Writes Coalescing metric set :: EU FPU0 Pipe Active */
#define bdw__data_port_writes_coalescing__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Data Port Writes Coalescing metric set :: EU FPU1 Pipe Active */
#define bdw__data_port_writes_coalescing__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Data Port Writes Coalescing metric set :: EU AVG IPC Rate */
#define bdw__data_port_writes_coalescing__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Data Port Writes Coalescing metric set :: EU Send Pipe Active */
#define bdw__data_port_writes_coalescing__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Data Port Writes Coalescing metric set :: EU Thread Occupancy */
#define bdw__data_port_writes_coalescing__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Data Port Writes Coalescing metric set :: Sampler Texels */
#define bdw__data_port_writes_coalescing__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Data Port Writes Coalescing metric set :: Sampler Texels Misses */
#define bdw__data_port_writes_coalescing__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Data Port Writes Coalescing metric set :: SLM Bytes Read */
#define bdw__data_port_writes_coalescing__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Data Port Writes Coalescing metric set :: SLM Bytes Written */
#define bdw__data_port_writes_coalescing__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Data Port Writes Coalescing metric set :: Shader Memory Accesses */
#define bdw__data_port_writes_coalescing__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Data Port Writes Coalescing metric set :: Shader Atomic Memory Accesses */
#define bdw__data_port_writes_coalescing__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Data Port Writes Coalescing metric set :: L3 Shader Throughput */
#define bdw__data_port_writes_coalescing__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Data Port Writes Coalescing metric set :: Shader Barrier Messages */
#define bdw__data_port_writes_coalescing__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Data Port Writes Coalescing metric set :: EU to Data Port 0 Writes 32B */
#define bdw__data_port_writes_coalescing__eu_hdc0_writes32_b__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Data Port Writes Coalescing metric set :: EU to Data Port 0 Writes 64B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes64_b__read(UNUSED struct intel_perf_config *perf,
                                                           const struct intel_perf_query_info *query,
                                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 1 READ B 4 READ UADD */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];
   uint64_t tmp1 = results->accumulator[query->b_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Data Port Writes Coalescing metric set :: EU to Data Port 0 Writes 64B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes96_b__read(UNUSED struct intel_perf_config *perf,
                                                           const struct intel_perf_query_info *query,
                                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ 2 UDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp1 / tmp2;

   return tmp3;
}

/* Data Port Writes Coalescing metric set :: EU to Data Port 0 Writes 128 */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes128_b__read(UNUSED struct intel_perf_config *perf,
                                                            const struct intel_perf_query_info *query,
                                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ 2 UDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp1 / tmp2;

   return tmp3;
}

/* Data Port Writes Coalescing metric set :: EU to Data Port 0 Writes 128 */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes192_b__read(UNUSED struct intel_perf_config *perf,
                                                            const struct intel_perf_query_info *query,
                                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: B 6 READ 2 UDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 6];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp1 / tmp2;

   return tmp3;
}

/* Data Port Writes Coalescing metric set :: EU to Data Port 0 Writes 64B */
#define bdw__data_port_writes_coalescing__eu_hdc0_writes128_b_simd16__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Data Port Writes Coalescing metric set :: EU to Data Port 0 Writes 256B */
static uint64_t
bdw__data_port_writes_coalescing__eu_hdc0_writes256_b_simd16__read(UNUSED struct intel_perf_config *perf,
                                                                   const struct intel_perf_query_info *query,
                                                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ 2 UDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = 2;
   uint64_t tmp3 = tmp1 / tmp2;

   return tmp3;
}

/* Data Port Writes Coalescing metric set :: Data Port 0 to L3 Data Reads */
#define bdw__data_port_writes_coalescing__hdc0_l3_data_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Data Port Writes Coalescing metric set :: Data Port 0 to L3 Data Writes */
#define bdw__data_port_writes_coalescing__hdc0_l3_data_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Data Port Writes Coalescing metric set :: All Data Port 0 Reads from L3 */
#define bdw__data_port_writes_coalescing__hdc0_l3_reads__read \
   bdw__data_port_reads_coalescing__hdc0_l3_reads__read

/* Data Port Writes Coalescing metric set :: All Data Port 0 Writes to L3 */
#define bdw__data_port_writes_coalescing__hdc0_l3_writes__read \
   hsw__render_basic__gpu_core_clocks__read

/* Data Port Writes Coalescing metric set :: EuBytesWrittenPerCacheLine */
static float
bdw__data_port_writes_coalescing__eu_bytes_written_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                                        const struct intel_perf_query_info *query,
                                                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuHdc0Writes32B 32 UMUL $EuHdc0Writes64B 64 UMUL $EuHdc0Writes96B 96 UMUL $EuHdc0Writes128B 128 UMUL $EuHdc0Writes128BSimd16 128 UMUL $EuHdc0Writes256BSimd16 256 UMUL $EuHdc0Writes192B 192 UMUL UADD UADD UADD UADD UADD UADD $Hdc0L3DataWrites FDIV */
   uint64_t tmp0 = bdw__data_port_writes_coalescing__eu_hdc0_writes32_b__read(perf, query, results) * 32;
   uint64_t tmp1 = bdw__data_port_writes_coalescing__eu_hdc0_writes64_b__read(perf, query, results) * 64;
   uint64_t tmp2 = bdw__data_port_writes_coalescing__eu_hdc0_writes96_b__read(perf, query, results) * 96;
   uint64_t tmp3 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b__read(perf, query, results) * 128;
   uint64_t tmp4 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b_simd16__read(perf, query, results) * 128;
   uint64_t tmp5 = bdw__data_port_writes_coalescing__eu_hdc0_writes256_b_simd16__read(perf, query, results) * 256;
   uint64_t tmp6 = bdw__data_port_writes_coalescing__eu_hdc0_writes192_b__read(perf, query, results) * 192;
   uint64_t tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = tmp4 + tmp7;
   uint64_t tmp9 = tmp3 + tmp8;
   uint64_t tmp10 = tmp2 + tmp9;
   uint64_t tmp11 = tmp1 + tmp10;
   uint64_t tmp12 = tmp0 + tmp11;
   double tmp13 = tmp12;
   double tmp14 = bdw__data_port_writes_coalescing__hdc0_l3_data_writes__read(perf, query, results);
   double tmp15 = tmp14 ? tmp13 / tmp14 : 0;

   return tmp15;
}

/* Data Port Writes Coalescing metric set :: EuDataWritesPerCacheLine */
static float
bdw__data_port_writes_coalescing__eu_data_writes_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                                      const struct intel_perf_query_info *query,
                                                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuBytesWrittenPerCacheLine 64 FDIV */
   double tmp0 = bdw__data_port_writes_coalescing__eu_bytes_written_per_cache_line__read(perf, query, results);
   double tmp1 = 64;
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define bdw__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define bdw__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define bdw__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define bdw__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define bdw__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define bdw__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define bdw__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define bdw__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define bdw__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define bdw__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define bdw__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define bdw__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define bdw__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define bdw__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define bdw__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define bdw__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define bdw__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define bdw__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define bdw__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define bdw__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define bdw__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define bdw__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define bdw__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define bdw__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define bdw__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define bdw__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define bdw__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define bdw__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define bdw__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define bdw__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define bdw__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define bdw__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define bdw__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define bdw__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define bdw__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define bdw__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define bdw__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
static float
bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ C 6 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
static float
bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ C 4 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = results->accumulator[query->c_offset + 4];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
static float
bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ C 2 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
static float
bdw__hdc_and_sf__non_sampler_shader12_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ C 0 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = results->accumulator[query->c_offset + 0];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
static float
bdw__hdc_and_sf__non_sampler_shader11_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ B 6 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
static float
bdw__hdc_and_sf__non_sampler_shader10_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: B 5 READ B 4 READ USUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];
   uint64_t tmp1 = results->accumulator[query->b_offset + 4];
   uint64_t tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = bdw__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: SQ is full */
static float
bdw__hdc_and_sf__gt_request_queue_full__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: PERFCNT 0 READ 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->perfcnt_offset + 0];
   uint64_t tmp1 = tmp0 * 100;
   double tmp2 = tmp1;
   double tmp3 = bdw__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;

   return tmp4;
}

/* Metric set L3_1 :: GPU Time Elapsed */
#define bdw__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define bdw__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define bdw__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define bdw__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define bdw__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define bdw__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define bdw__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define bdw__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define bdw__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define bdw__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define bdw__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define bdw__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define bdw__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define bdw__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define bdw__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define bdw__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define bdw__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define bdw__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define bdw__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define bdw__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define bdw__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define bdw__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define bdw__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define bdw__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define bdw__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define bdw__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define bdw__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define bdw__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define bdw__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define bdw__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define bdw__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define bdw__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define bdw__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define bdw__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define bdw__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define bdw__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice1 L3 Bank0 Stalled */
#define bdw__l3_1__l31_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice1 L3 Bank1 Stalled */
#define bdw__l3_1__l31_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice1 L3 Bank1 Active */
#define bdw__l3_1__l31_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice1 L3 Bank0 Active */
#define bdw__l3_1__l31_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define bdw__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define bdw__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define bdw__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define bdw__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define bdw__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define bdw__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define bdw__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define bdw__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define bdw__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define bdw__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define bdw__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define bdw__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define bdw__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define bdw__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define bdw__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define bdw__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define bdw__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define bdw__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define bdw__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define bdw__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define bdw__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define bdw__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define bdw__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define bdw__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define bdw__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define bdw__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define bdw__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define bdw__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define bdw__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define bdw__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define bdw__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define bdw__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define bdw__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define bdw__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define bdw__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define bdw__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define bdw__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank0 Stalled */
#define bdw__l3_2__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank1 Stalled */
#define bdw__l3_2__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_2 :: Slice0 L3 Bank1 Active */
#define bdw__l3_2__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: Slice0 L3 Bank0 Active */
#define bdw__l3_2__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define bdw__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define bdw__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define bdw__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define bdw__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define bdw__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define bdw__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define bdw__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define bdw__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define bdw__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define bdw__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define bdw__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define bdw__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define bdw__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define bdw__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define bdw__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define bdw__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define bdw__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define bdw__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define bdw__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define bdw__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define bdw__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define bdw__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define bdw__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define bdw__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define bdw__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define bdw__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define bdw__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define bdw__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define bdw__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define bdw__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define bdw__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define bdw__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define bdw__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define bdw__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define bdw__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define bdw__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define bdw__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define bdw__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice1 L3 Bank3 Stalled */
#define bdw__l3_3__l31_bank3_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_3 :: Slice1 L3 Bank3 Active */
#define bdw__l3_3__l31_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define bdw__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define bdw__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_4 :: GPU Time Elapsed */
#define bdw__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_4 :: GPU Core Clocks */
#define bdw__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_4 :: AVG GPU Core Frequency */
#define bdw__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_4 :: AVG GPU Core Frequency */
#define bdw__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_4 :: GPU Busy */
#define bdw__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_4 :: VS Threads Dispatched */
#define bdw__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_4 :: HS Threads Dispatched */
#define bdw__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_4 :: DS Threads Dispatched */
#define bdw__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_4 :: GS Threads Dispatched */
#define bdw__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_4 :: FS Threads Dispatched */
#define bdw__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_4 :: CS Threads Dispatched */
#define bdw__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_4 :: EU Active */
#define bdw__l3_4__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_4 :: EU Stall */
#define bdw__l3_4__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_4 :: EU Both FPU Pipes Active */
#define bdw__l3_4__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_4 :: VS FPU0 Pipe Active */
#define bdw__l3_4__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_4 :: VS FPU1 Pipe Active */
#define bdw__l3_4__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_4 :: VS Send Pipe Active */
#define bdw__l3_4__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_4 :: PS FPU0 Pipe Active */
#define bdw__l3_4__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_4 :: PS FPU1 Pipe Active */
#define bdw__l3_4__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_4 :: PS Send Pipeline Active */
#define bdw__l3_4__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_4 :: FS Both FPU Active */
#define bdw__l3_4__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_4 :: Rasterized Pixels */
#define bdw__l3_4__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_4 :: Early Hi-Depth Test Fails */
#define bdw__l3_4__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_4 :: Early Depth Test Fails */
#define bdw__l3_4__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_4 :: Samples Killed in FS */
#define bdw__l3_4__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_4 :: Pixels Failing Tests */
#define bdw__l3_4__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_4 :: Samples Written */
#define bdw__l3_4__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_4 :: Samples Blended */
#define bdw__l3_4__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_4 :: Sampler Texels */
#define bdw__l3_4__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_4 :: Sampler Texels Misses */
#define bdw__l3_4__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_4 :: SLM Bytes Read */
#define bdw__l3_4__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_4 :: SLM Bytes Written */
#define bdw__l3_4__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_4 :: Shader Memory Accesses */
#define bdw__l3_4__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_4 :: Shader Atomic Memory Accesses */
#define bdw__l3_4__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_4 :: L3 Shader Throughput */
#define bdw__l3_4__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_4 :: Shader Barrier Messages */
#define bdw__l3_4__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_4 :: Slice0 L3 Bank2 Stalled */
#define bdw__l3_4__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_4 :: Slice1 L3 Bank2 Stalled */
#define bdw__l3_4__l31_bank2_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_4 :: Slice1 L3 Bank2 Active */
#define bdw__l3_4__l31_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_4 :: Slice0 L3 Bank2 Active */
#define bdw__l3_4__l30_bank2_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_4 :: SQ is full */
#define bdw__l3_4__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define bdw__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define bdw__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define bdw__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define bdw__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define bdw__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define bdw__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define bdw__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define bdw__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define bdw__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define bdw__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define bdw__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define bdw__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define bdw__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define bdw__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define bdw__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define bdw__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define bdw__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define bdw__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define bdw__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define bdw__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define bdw__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define bdw__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define bdw__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define bdw__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define bdw__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define bdw__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define bdw__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define bdw__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define bdw__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define bdw__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define bdw__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define bdw__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define bdw__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define bdw__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define bdw__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define bdw__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Post-EarlyZ Pixel Data Ready */
#define bdw__rasterizer_and_pixel_backend__pixel_data1_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define bdw__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Rasterizer Input Available */
#define bdw__rasterizer_and_pixel_backend__rasterizer1_input_available__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Rasterizer Output Ready */
#define bdw__rasterizer_and_pixel_backend__rasterizer1_output_ready__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define bdw__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define bdw__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define bdw__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Pixel Values Ready */
#define bdw__rasterizer_and_pixel_backend__pixel_values1_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice1 PS Output Available */
#define bdw__rasterizer_and_pixel_backend__ps_output1_available__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define bdw__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler_1 :: GPU Time Elapsed */
#define bdw__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler_1 :: GPU Core Clocks */
#define bdw__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler_1 :: AVG GPU Core Frequency */
#define bdw__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler_1 :: AVG GPU Core Frequency */
#define bdw__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler_1 :: GPU Busy */
#define bdw__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler_1 :: VS Threads Dispatched */
#define bdw__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler_1 :: HS Threads Dispatched */
#define bdw__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler_1 :: DS Threads Dispatched */
#define bdw__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler_1 :: GS Threads Dispatched */
#define bdw__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler_1 :: FS Threads Dispatched */
#define bdw__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler_1 :: CS Threads Dispatched */
#define bdw__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler_1 :: EU Active */
#define bdw__sampler_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler_1 :: EU Stall */
#define bdw__sampler_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler_1 :: EU Both FPU Pipes Active */
#define bdw__sampler_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler_1 :: VS FPU0 Pipe Active */
#define bdw__sampler_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler_1 :: VS FPU1 Pipe Active */
#define bdw__sampler_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler_1 :: VS Send Pipe Active */
#define bdw__sampler_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler_1 :: PS FPU0 Pipe Active */
#define bdw__sampler_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler_1 :: PS FPU1 Pipe Active */
#define bdw__sampler_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler_1 :: PS Send Pipeline Active */
#define bdw__sampler_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler_1 :: FS Both FPU Active */
#define bdw__sampler_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler_1 :: Rasterized Pixels */
#define bdw__sampler_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler_1 :: Early Hi-Depth Test Fails */
#define bdw__sampler_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler_1 :: Early Depth Test Fails */
#define bdw__sampler_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_1 :: Samples Killed in FS */
#define bdw__sampler_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler_1 :: Pixels Failing Tests */
#define bdw__sampler_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler_1 :: Samples Written */
#define bdw__sampler_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler_1 :: Samples Blended */
#define bdw__sampler_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler_1 :: Sampler Texels */
#define bdw__sampler_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler_1 :: Sampler Texels Misses */
#define bdw__sampler_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler_1 :: SLM Bytes Read */
#define bdw__sampler_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler_1 :: SLM Bytes Written */
#define bdw__sampler_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler_1 :: Shader Memory Accesses */
#define bdw__sampler_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler_1 :: Shader Atomic Memory Accesses */
#define bdw__sampler_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler_1 :: L3 Shader Throughput */
#define bdw__sampler_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler_1 :: Shader Barrier Messages */
#define bdw__sampler_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_1 :: Slice1 Subslice1 Input Available */
#define bdw__sampler_1__sampler11_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler_1 :: Slice1 Subslice2 Input Available */
#define bdw__sampler_1__sampler12_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler_1 :: Slice1 Subslice0 Input Available */
#define bdw__sampler_1__sampler10_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler_1 :: Slice1 Subslice2 Sampler Output Ready */
#define bdw__sampler_1__sampler12_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler_1 :: Slice1 Subslice0 Sampler Output Ready */
#define bdw__sampler_1__sampler10_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler_1 :: Slice1 Subslice1 Sampler Output Ready */
#define bdw__sampler_1__sampler11_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler_1 :: SQ is full */
#define bdw__sampler_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler_2 :: GPU Time Elapsed */
#define bdw__sampler_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler_2 :: GPU Core Clocks */
#define bdw__sampler_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler_2 :: AVG GPU Core Frequency */
#define bdw__sampler_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler_2 :: AVG GPU Core Frequency */
#define bdw__sampler_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler_2 :: GPU Busy */
#define bdw__sampler_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler_2 :: VS Threads Dispatched */
#define bdw__sampler_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler_2 :: HS Threads Dispatched */
#define bdw__sampler_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler_2 :: DS Threads Dispatched */
#define bdw__sampler_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler_2 :: GS Threads Dispatched */
#define bdw__sampler_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler_2 :: FS Threads Dispatched */
#define bdw__sampler_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler_2 :: CS Threads Dispatched */
#define bdw__sampler_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler_2 :: EU Active */
#define bdw__sampler_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler_2 :: EU Stall */
#define bdw__sampler_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler_2 :: EU Both FPU Pipes Active */
#define bdw__sampler_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler_2 :: VS FPU0 Pipe Active */
#define bdw__sampler_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler_2 :: VS FPU1 Pipe Active */
#define bdw__sampler_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler_2 :: VS Send Pipe Active */
#define bdw__sampler_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler_2 :: PS FPU0 Pipe Active */
#define bdw__sampler_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler_2 :: PS FPU1 Pipe Active */
#define bdw__sampler_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler_2 :: PS Send Pipeline Active */
#define bdw__sampler_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler_2 :: FS Both FPU Active */
#define bdw__sampler_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler_2 :: Rasterized Pixels */
#define bdw__sampler_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler_2 :: Early Hi-Depth Test Fails */
#define bdw__sampler_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler_2 :: Early Depth Test Fails */
#define bdw__sampler_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_2 :: Samples Killed in FS */
#define bdw__sampler_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler_2 :: Pixels Failing Tests */
#define bdw__sampler_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler_2 :: Samples Written */
#define bdw__sampler_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler_2 :: Samples Blended */
#define bdw__sampler_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler_2 :: Sampler Texels */
#define bdw__sampler_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler_2 :: Sampler Texels Misses */
#define bdw__sampler_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler_2 :: SLM Bytes Read */
#define bdw__sampler_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler_2 :: SLM Bytes Written */
#define bdw__sampler_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler_2 :: Shader Memory Accesses */
#define bdw__sampler_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler_2 :: Shader Atomic Memory Accesses */
#define bdw__sampler_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler_2 :: L3 Shader Throughput */
#define bdw__sampler_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler_2 :: Shader Barrier Messages */
#define bdw__sampler_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_2 :: Slice0 Subslice1 Input Available */
#define bdw__sampler_2__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler_2 :: Slice0 Subslice2 Input Available */
#define bdw__sampler_2__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler_2 :: Slice0 Subslice0 Input Available */
#define bdw__sampler_2__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler_2 :: Slice0 Subslice2 Sampler Output Ready */
#define bdw__sampler_2__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler_2 :: Slice0 Subslice0 Sampler Output Ready */
#define bdw__sampler_2__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler_2 :: Slice0 Subslice1 Sampler Output Ready */
#define bdw__sampler_2__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler_2 :: SQ is full */
#define bdw__sampler_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define bdw__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define bdw__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define bdw__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define bdw__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define bdw__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define bdw__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define bdw__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define bdw__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define bdw__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define bdw__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define bdw__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define bdw__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define bdw__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define bdw__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define bdw__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define bdw__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define bdw__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define bdw__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define bdw__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define bdw__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define bdw__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define bdw__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define bdw__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define bdw__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define bdw__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define bdw__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define bdw__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define bdw__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define bdw__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define bdw__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define bdw__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define bdw__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define bdw__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define bdw__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define bdw__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define bdw__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice1 Subslice1 */
#define bdw__tdl_1__non_ps_thread11_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice1 Subslice0 */
#define bdw__tdl_1__ps_thread10_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice1 Subslice0 */
#define bdw__tdl_1__non_ps_thread10_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice1 Subslice2 */
#define bdw__tdl_1__ps_thread12_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice1 Subslice2 */
#define bdw__tdl_1__non_ps_thread12_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice1 Subslice1 */
#define bdw__tdl_1__ps_thread11_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define bdw__tdl_1__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define bdw__tdl_1__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define bdw__tdl_1__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define bdw__tdl_1__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define bdw__tdl_1__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define bdw__tdl_1__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_1 :: SQ is full */
#define bdw__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define bdw__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define bdw__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define bdw__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define bdw__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define bdw__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define bdw__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define bdw__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define bdw__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define bdw__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define bdw__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define bdw__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define bdw__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define bdw__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define bdw__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define bdw__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define bdw__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define bdw__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define bdw__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define bdw__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define bdw__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define bdw__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define bdw__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define bdw__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define bdw__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define bdw__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define bdw__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define bdw__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define bdw__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define bdw__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define bdw__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define bdw__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define bdw__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define bdw__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define bdw__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define bdw__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define bdw__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define bdw__tdl_2__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_2 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define bdw__tdl_2__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_2 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define bdw__tdl_2__ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_2 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define bdw__tdl_2__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_2 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define bdw__tdl_2__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_2 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define bdw__tdl_2__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice2 Port 0 */
#define bdw__tdl_2__thread_header12_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice2 Port 1 */
#define bdw__tdl_2__thread_header12_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice1 Port 1 */
#define bdw__tdl_2__thread_header11_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice0 Port 0 */
#define bdw__tdl_2__thread_header10_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice0 Port 1 */
#define bdw__tdl_2__thread_header10_ready_port1__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice1 Port 0 */
#define bdw__tdl_2__thread_header11_ready_port0__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define bdw__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define bdw__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define bdw__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define bdw__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define bdw__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define bdw__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
static float
bdw__compute_extra__fpu1_active_adjusted__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: A 8 READ  C 5 READ C 6 READ FADD C 7 READ FADD C 2 READ FADD C 3 READ FADD C 4 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 8];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   uint64_t tmp2 = results->accumulator[query->c_offset + 6];
   double tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = results->accumulator[query->c_offset + 7];
   double tmp5 = tmp3 + tmp4;
   uint64_t tmp6 = results->accumulator[query->c_offset + 2];
   double tmp7 = tmp5 + tmp6;
   uint64_t tmp8 = results->accumulator[query->c_offset + 3];
   double tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->c_offset + 4];
   double tmp11 = tmp9 + tmp10;
   double tmp12 = tmp11 * 8;
   double tmp13 = tmp0 + tmp12;
   double tmp14 = tmp13 * 100;
   double tmp15 = tmp14;
   double tmp16 = perf->sys_vars.n_eus;
   double tmp17 = tmp16 ? tmp15 / tmp16 : 0;
   double tmp18 = tmp17;
   double tmp19 = bdw__compute_extra__gpu_core_clocks__read(perf, query, results);
   double tmp20 = tmp19 ? tmp18 / tmp19 : 0;

   return tmp20;
}

/* Media Vme Pipe metric set :: GPU Time Elapsed */
#define bdw__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metric set :: GPU Core Clocks */
#define bdw__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metric set :: AVG GPU Core Frequency */
#define bdw__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metric set :: AVG GPU Core Frequency */
#define bdw__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metric set :: GPU Busy */
#define bdw__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metric set :: CS Threads Dispatched */
#define bdw__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metric set :: EU Active */
#define bdw__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metric set :: EU Stall */
#define bdw__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metric set :: EU Both FPU Pipes Active */
#define bdw__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metric set :: EU Thread Occupancy */
static float
bdw__vme_pipe__eu_thread_occupancy__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: 8 A 10 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 10];
   double tmp1 = 8 * tmp0;
   double tmp2 = tmp1;
   double tmp3 = perf->sys_vars.eu_threads_count;
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = perf->sys_vars.n_eus;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = bdw__vme_pipe__gpu_core_clocks__read(perf, query, results);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* Media Vme Pipe metric set :: VME Busy */
static float
bdw__vme_pipe__vme_busy__read(UNUSED struct intel_perf_config *perf,
                              const struct intel_perf_query_info *query,
                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 3 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 3];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 2;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   uint64_t tmp6 = tmp5 * 100;
   double tmp7 = tmp6;
   double tmp8 = bdw__vme_pipe__gpu_core_clocks__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define bdw__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define bdw__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define bdw__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define bdw__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: Render Ring Busy */
#define bdw__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define bdw__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vdbox1 Ring Busy */
#define bdw__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define bdw__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define bdw__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define bdw__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define bdw__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define bdw__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define bdw__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define bdw__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define bdw__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define bdw__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define bdw__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define bdw__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define bdw__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define bdw__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define bdw__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define bdw__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define bdw__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define bdw__stc__pma_stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define bdw__stc__pma_stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define bdw__stc__pma_stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define bdw__stc__pma_stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: GPU Busy */
#define bdw__stc__pma_stall__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set PMA Stall :: VS Threads Dispatched */
#define bdw__stc__pma_stall__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set PMA Stall :: HS Threads Dispatched */
#define bdw__stc__pma_stall__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set PMA Stall :: DS Threads Dispatched */
#define bdw__stc__pma_stall__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set PMA Stall :: GS Threads Dispatched */
#define bdw__stc__pma_stall__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set PMA Stall :: FS Threads Dispatched */
#define bdw__stc__pma_stall__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set PMA Stall :: CS Threads Dispatched */
#define bdw__stc__pma_stall__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set PMA Stall :: STC PMA stall */
#define bdw__stc__pma_stall__stc_pma_stall__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define chv__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define chv__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define chv__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define chv__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define chv__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define chv__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define chv__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define chv__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define chv__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define chv__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define chv__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define chv__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define chv__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define chv__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define chv__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define chv__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define chv__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define chv__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define chv__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define chv__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define chv__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define chv__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define chv__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
static float
chv__render_basic__samplers_busy__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ  B 1 READ UADD $GpuCoreClocks FDIV 2 FDIV 100 FMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = chv__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   double tmp6 = tmp5;
   double tmp7 = 2;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 * 100;

   return tmp9;
}

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define chv__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define chv__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Samples Written */
#define chv__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define chv__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define chv__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define chv__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define chv__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define chv__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define chv__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define chv__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define chv__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define chv__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define chv__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define chv__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define chv__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Misses */
#define chv__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
static uint64_t
chv__render_basic__l3_sampler_throughput__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: A 29 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 29];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics Basic set :: L3 Shader Throughput */
#define chv__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define chv__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define chv__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define chv__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define chv__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define chv__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define chv__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define chv__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define chv__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define chv__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define chv__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define chv__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define chv__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define chv__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define chv__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define chv__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define chv__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define chv__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define chv__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define chv__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define chv__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define chv__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define chv__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define chv__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define chv__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define chv__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define chv__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define chv__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define chv__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define chv__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define chv__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define chv__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define chv__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define chv__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define chv__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define chv__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define chv__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define chv__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define chv__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define chv__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define chv__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define chv__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define chv__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define chv__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
static uint64_t
chv__compute_basic__typed_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 1 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Compute Metrics Basic set :: Typed Bytes Written */
static uint64_t
chv__compute_basic__typed_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ B 3 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = results->accumulator[query->b_offset + 3];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Compute Metrics Basic set :: Untyped Bytes Read */
static uint64_t
chv__compute_basic__untyped_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ B 5 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 64;

   return tmp3;
}

/* Compute Metrics Basic set :: Untyped Writes */
#define chv__compute_basic__untyped_bytes_written__read \
   hsw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define chv__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_depth_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define chv__compute_basic__gti_write_throughput__read \
   hsw__render_basic__gti_vf_throughput__read

/* Compute Metrics Basic set :: GTI Ring Throughput */
static uint64_t
chv__compute_basic__gti_ring_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 2 READ 128 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 2];
   uint64_t tmp1 = tmp0 * 128;

   return tmp1;
}

/* Compute Metrics Basic set :: GTI Read-Only Stall */
#define chv__compute_basic__gti_ro_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Compute Metrics Basic set :: GTI Read-Write Stall */
#define chv__compute_basic__gti_rw_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics for 3D Pipeline Profile :: GPU Time Elapsed */
#define chv__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics for 3D Pipeline Profile :: GPU Core Clocks */
#define chv__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define chv__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define chv__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics for 3D Pipeline Profile :: GPU Busy */
#define chv__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics for 3D Pipeline Profile :: VS Threads Dispatched */
#define chv__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics for 3D Pipeline Profile :: HS Threads Dispatched */
#define chv__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics for 3D Pipeline Profile :: DS Threads Dispatched */
#define chv__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics for 3D Pipeline Profile :: GS Threads Dispatched */
#define chv__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics for 3D Pipeline Profile :: FS Threads Dispatched */
#define chv__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics for 3D Pipeline Profile :: CS Threads Dispatched */
#define chv__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics for 3D Pipeline Profile :: EU Active */
#define chv__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics for 3D Pipeline Profile :: EU Stall */
#define chv__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics for 3D Pipeline Profile :: Rasterized Pixels */
#define chv__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define chv__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: Early Depth Test Fails */
#define chv__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: Samples Killed in FS */
#define chv__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics for 3D Pipeline Profile :: Pixels Failing Tests */
#define chv__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics for 3D Pipeline Profile :: Samples Written */
#define chv__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics for 3D Pipeline Profile :: Samples Blended */
#define chv__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics for 3D Pipeline Profile :: Sampler Accesses */
#define chv__render_pipe_profile__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels */
#define chv__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels Misses */
#define chv__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Read */
#define chv__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Written */
#define chv__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics for 3D Pipeline Profile :: Shader Memory Accesses */
#define chv__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define chv__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics for 3D Pipeline Profile :: L3 Shader Throughput */
#define chv__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics for 3D Pipeline Profile :: Shader Barrier Messages */
#define chv__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: VF Bottleneck */
#define chv__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: VS Bottleneck */
#define chv__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics for 3D Pipeline Profile :: HS Bottleneck */
#define chv__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics for 3D Pipeline Profile :: DS Bottleneck */
#define chv__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: GS Bottleneck */
#define chv__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: SO Bottleneck */
#define chv__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: Clipper Bottleneck */
#define chv__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define chv__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define chv__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: Early Depth Bottleneck */
#define chv__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: BC Bottleneck */
#define chv__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: HS Stall */
#define chv__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics for 3D Pipeline Profile :: DS Stall */
#define chv__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics for 3D Pipeline Profile :: SO Stall */
#define chv__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics for 3D Pipeline Profile :: CL Stall */
#define chv__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics for 3D Pipeline Profile :: SF Stall */
#define chv__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define chv__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define chv__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define chv__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define chv__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define chv__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define chv__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define chv__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define chv__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define chv__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define chv__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define chv__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define chv__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define chv__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define chv__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define chv__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define chv__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define chv__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define chv__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define chv__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define chv__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define chv__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define chv__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define chv__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define chv__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define chv__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define chv__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define chv__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define chv__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define chv__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define chv__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define chv__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define chv__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define chv__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define chv__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define chv__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define chv__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define chv__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define chv__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define chv__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define chv__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define chv__hdc_and_sf__non_sampler_shader12_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader12_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define chv__hdc_and_sf__non_sampler_shader11_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader11_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define chv__hdc_and_sf__non_sampler_shader10_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader10_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define chv__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define chv__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define chv__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define chv__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define chv__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define chv__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define chv__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define chv__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define chv__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define chv__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define chv__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define chv__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define chv__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define chv__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define chv__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define chv__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define chv__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define chv__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define chv__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define chv__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define chv__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define chv__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define chv__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define chv__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define chv__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define chv__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define chv__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define chv__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define chv__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define chv__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define chv__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define chv__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define chv__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define chv__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define chv__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define chv__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define chv__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice1 L3 Bank0 Stalled */
#define chv__l3_1__l31_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice1 L3 Bank1 Stalled */
#define chv__l3_1__l31_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice1 L3 Bank1 Active */
#define chv__l3_1__l31_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice1 L3 Bank0 Active */
#define chv__l3_1__l31_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define chv__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define chv__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define chv__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define chv__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define chv__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define chv__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define chv__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define chv__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define chv__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define chv__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define chv__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define chv__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define chv__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define chv__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define chv__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define chv__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define chv__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define chv__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define chv__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define chv__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define chv__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define chv__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define chv__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define chv__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define chv__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define chv__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define chv__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define chv__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define chv__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define chv__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define chv__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define chv__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define chv__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define chv__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define chv__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define chv__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define chv__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank0 Stalled */
#define chv__l3_2__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank1 Stalled */
#define chv__l3_2__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_2 :: Slice0 L3 Bank1 Active */
#define chv__l3_2__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: Slice0 L3 Bank0 Active */
#define chv__l3_2__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define chv__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define chv__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define chv__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define chv__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define chv__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define chv__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define chv__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define chv__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define chv__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define chv__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define chv__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define chv__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define chv__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define chv__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define chv__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define chv__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define chv__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define chv__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define chv__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define chv__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define chv__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define chv__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define chv__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define chv__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define chv__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define chv__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define chv__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define chv__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define chv__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define chv__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define chv__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define chv__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define chv__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define chv__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define chv__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define chv__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define chv__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define chv__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice1 L3 Bank3 Stalled */
#define chv__l3_3__l31_bank3_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_3 :: Slice1 L3 Bank3 Active */
#define chv__l3_3__l31_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define chv__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define chv__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_4 :: GPU Time Elapsed */
#define chv__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_4 :: GPU Core Clocks */
#define chv__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_4 :: AVG GPU Core Frequency */
#define chv__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_4 :: AVG GPU Core Frequency */
#define chv__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_4 :: GPU Busy */
#define chv__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_4 :: VS Threads Dispatched */
#define chv__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_4 :: HS Threads Dispatched */
#define chv__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_4 :: DS Threads Dispatched */
#define chv__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_4 :: GS Threads Dispatched */
#define chv__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_4 :: FS Threads Dispatched */
#define chv__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_4 :: CS Threads Dispatched */
#define chv__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_4 :: EU Active */
#define chv__l3_4__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_4 :: EU Stall */
#define chv__l3_4__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_4 :: EU Both FPU Pipes Active */
#define chv__l3_4__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_4 :: VS FPU0 Pipe Active */
#define chv__l3_4__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_4 :: VS FPU1 Pipe Active */
#define chv__l3_4__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_4 :: VS Send Pipe Active */
#define chv__l3_4__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_4 :: PS FPU0 Pipe Active */
#define chv__l3_4__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_4 :: PS FPU1 Pipe Active */
#define chv__l3_4__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_4 :: PS Send Pipeline Active */
#define chv__l3_4__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_4 :: FS Both FPU Active */
#define chv__l3_4__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_4 :: Rasterized Pixels */
#define chv__l3_4__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_4 :: Early Hi-Depth Test Fails */
#define chv__l3_4__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_4 :: Early Depth Test Fails */
#define chv__l3_4__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_4 :: Samples Killed in FS */
#define chv__l3_4__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_4 :: Pixels Failing Tests */
#define chv__l3_4__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_4 :: Samples Written */
#define chv__l3_4__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_4 :: Samples Blended */
#define chv__l3_4__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_4 :: Sampler Texels */
#define chv__l3_4__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_4 :: Sampler Texels Misses */
#define chv__l3_4__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_4 :: SLM Bytes Read */
#define chv__l3_4__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_4 :: SLM Bytes Written */
#define chv__l3_4__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_4 :: Shader Memory Accesses */
#define chv__l3_4__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_4 :: Shader Atomic Memory Accesses */
#define chv__l3_4__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_4 :: L3 Shader Throughput */
#define chv__l3_4__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_4 :: Shader Barrier Messages */
#define chv__l3_4__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_4 :: Slice0 L3 Bank2 Stalled */
#define chv__l3_4__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_4 :: Slice1 L3 Bank2 Stalled */
#define chv__l3_4__l31_bank2_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_4 :: Slice1 L3 Bank2 Active */
#define chv__l3_4__l31_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_4 :: Slice0 L3 Bank2 Active */
#define chv__l3_4__l30_bank2_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_4 :: SQ is full */
#define chv__l3_4__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define chv__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define chv__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define chv__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define chv__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define chv__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define chv__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define chv__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define chv__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define chv__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define chv__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define chv__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define chv__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define chv__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define chv__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define chv__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define chv__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define chv__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define chv__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define chv__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define chv__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define chv__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define chv__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define chv__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define chv__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define chv__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define chv__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define chv__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define chv__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define chv__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define chv__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define chv__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define chv__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define chv__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define chv__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define chv__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define chv__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define chv__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Post-EarlyZ Pixel Data Ready */
#define chv__rasterizer_and_pixel_backend__pixel_data1_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define chv__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Rasterizer Input Available */
#define chv__rasterizer_and_pixel_backend__rasterizer1_input_available__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Rasterizer Output Ready */
#define chv__rasterizer_and_pixel_backend__rasterizer1_output_ready__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define chv__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define chv__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define chv__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice1 Pixel Values Ready */
#define chv__rasterizer_and_pixel_backend__pixel_values1_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice1 PS Output Available */
#define chv__rasterizer_and_pixel_backend__ps_output1_available__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define chv__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler_1 :: GPU Time Elapsed */
#define chv__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler_1 :: GPU Core Clocks */
#define chv__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler_1 :: AVG GPU Core Frequency */
#define chv__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler_1 :: AVG GPU Core Frequency */
#define chv__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler_1 :: GPU Busy */
#define chv__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler_1 :: VS Threads Dispatched */
#define chv__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler_1 :: HS Threads Dispatched */
#define chv__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler_1 :: DS Threads Dispatched */
#define chv__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler_1 :: GS Threads Dispatched */
#define chv__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler_1 :: FS Threads Dispatched */
#define chv__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler_1 :: CS Threads Dispatched */
#define chv__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler_1 :: EU Active */
#define chv__sampler_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler_1 :: EU Stall */
#define chv__sampler_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler_1 :: EU Both FPU Pipes Active */
#define chv__sampler_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler_1 :: VS FPU0 Pipe Active */
#define chv__sampler_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler_1 :: VS FPU1 Pipe Active */
#define chv__sampler_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler_1 :: VS Send Pipe Active */
#define chv__sampler_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler_1 :: PS FPU0 Pipe Active */
#define chv__sampler_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler_1 :: PS FPU1 Pipe Active */
#define chv__sampler_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler_1 :: PS Send Pipeline Active */
#define chv__sampler_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler_1 :: FS Both FPU Active */
#define chv__sampler_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler_1 :: Rasterized Pixels */
#define chv__sampler_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler_1 :: Early Hi-Depth Test Fails */
#define chv__sampler_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler_1 :: Early Depth Test Fails */
#define chv__sampler_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_1 :: Samples Killed in FS */
#define chv__sampler_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler_1 :: Pixels Failing Tests */
#define chv__sampler_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler_1 :: Samples Written */
#define chv__sampler_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler_1 :: Samples Blended */
#define chv__sampler_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler_1 :: Sampler Texels */
#define chv__sampler_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler_1 :: Sampler Texels Misses */
#define chv__sampler_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler_1 :: SLM Bytes Read */
#define chv__sampler_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler_1 :: SLM Bytes Written */
#define chv__sampler_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler_1 :: Shader Memory Accesses */
#define chv__sampler_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler_1 :: Shader Atomic Memory Accesses */
#define chv__sampler_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler_1 :: L3 Shader Throughput */
#define chv__sampler_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler_1 :: Shader Barrier Messages */
#define chv__sampler_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_1 :: Slice1 Subslice1 Input Available */
#define chv__sampler_1__sampler11_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler_1 :: Slice1 Subslice2 Input Available */
#define chv__sampler_1__sampler12_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler_1 :: Slice1 Subslice0 Input Available */
#define chv__sampler_1__sampler10_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler_1 :: Slice1 Subslice2 Sampler Output Ready */
#define chv__sampler_1__sampler12_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler_1 :: Slice1 Subslice0 Sampler Output Ready */
#define chv__sampler_1__sampler10_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler_1 :: Slice1 Subslice1 Sampler Output Ready */
#define chv__sampler_1__sampler11_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler_1 :: SQ is full */
#define chv__sampler_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler_2 :: GPU Time Elapsed */
#define chv__sampler_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler_2 :: GPU Core Clocks */
#define chv__sampler_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler_2 :: AVG GPU Core Frequency */
#define chv__sampler_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler_2 :: AVG GPU Core Frequency */
#define chv__sampler_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler_2 :: GPU Busy */
#define chv__sampler_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler_2 :: VS Threads Dispatched */
#define chv__sampler_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler_2 :: HS Threads Dispatched */
#define chv__sampler_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler_2 :: DS Threads Dispatched */
#define chv__sampler_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler_2 :: GS Threads Dispatched */
#define chv__sampler_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler_2 :: FS Threads Dispatched */
#define chv__sampler_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler_2 :: CS Threads Dispatched */
#define chv__sampler_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler_2 :: EU Active */
#define chv__sampler_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler_2 :: EU Stall */
#define chv__sampler_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler_2 :: EU Both FPU Pipes Active */
#define chv__sampler_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler_2 :: VS FPU0 Pipe Active */
#define chv__sampler_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler_2 :: VS FPU1 Pipe Active */
#define chv__sampler_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler_2 :: VS Send Pipe Active */
#define chv__sampler_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler_2 :: PS FPU0 Pipe Active */
#define chv__sampler_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler_2 :: PS FPU1 Pipe Active */
#define chv__sampler_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler_2 :: PS Send Pipeline Active */
#define chv__sampler_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler_2 :: FS Both FPU Active */
#define chv__sampler_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler_2 :: Rasterized Pixels */
#define chv__sampler_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler_2 :: Early Hi-Depth Test Fails */
#define chv__sampler_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler_2 :: Early Depth Test Fails */
#define chv__sampler_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_2 :: Samples Killed in FS */
#define chv__sampler_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler_2 :: Pixels Failing Tests */
#define chv__sampler_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler_2 :: Samples Written */
#define chv__sampler_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler_2 :: Samples Blended */
#define chv__sampler_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler_2 :: Sampler Texels */
#define chv__sampler_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler_2 :: Sampler Texels Misses */
#define chv__sampler_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler_2 :: SLM Bytes Read */
#define chv__sampler_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler_2 :: SLM Bytes Written */
#define chv__sampler_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler_2 :: Shader Memory Accesses */
#define chv__sampler_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler_2 :: Shader Atomic Memory Accesses */
#define chv__sampler_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler_2 :: L3 Shader Throughput */
#define chv__sampler_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler_2 :: Shader Barrier Messages */
#define chv__sampler_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler_2 :: Slice0 Subslice1 Input Available */
#define chv__sampler_2__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler_2 :: Slice0 Subslice2 Input Available */
#define chv__sampler_2__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler_2 :: Slice0 Subslice0 Input Available */
#define chv__sampler_2__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler_2 :: Slice0 Subslice2 Sampler Output Ready */
#define chv__sampler_2__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler_2 :: Slice0 Subslice0 Sampler Output Ready */
#define chv__sampler_2__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler_2 :: Slice0 Subslice1 Sampler Output Ready */
#define chv__sampler_2__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler_2 :: SQ is full */
#define chv__sampler_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define chv__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define chv__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define chv__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define chv__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define chv__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define chv__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define chv__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define chv__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define chv__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define chv__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define chv__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define chv__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define chv__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define chv__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define chv__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define chv__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define chv__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define chv__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define chv__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define chv__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define chv__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define chv__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define chv__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define chv__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define chv__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define chv__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define chv__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define chv__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define chv__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define chv__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define chv__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define chv__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define chv__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define chv__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define chv__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define chv__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice1 Subslice1 */
#define chv__tdl_1__non_ps_thread11_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice1 Subslice0 */
#define chv__tdl_1__ps_thread10_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice1 Subslice0 */
#define chv__tdl_1__non_ps_thread10_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice1 Subslice2 */
#define chv__tdl_1__ps_thread12_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice1 Subslice2 */
#define chv__tdl_1__non_ps_thread12_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice1 Subslice1 */
#define chv__tdl_1__ps_thread11_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define chv__tdl_1__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define chv__tdl_1__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define chv__tdl_1__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define chv__tdl_1__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define chv__tdl_1__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_1 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define chv__tdl_1__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_1 :: SQ is full */
#define chv__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define chv__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define chv__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define chv__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define chv__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define chv__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define chv__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define chv__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define chv__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define chv__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define chv__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define chv__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define chv__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define chv__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define chv__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define chv__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define chv__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define chv__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define chv__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define chv__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define chv__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define chv__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define chv__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define chv__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define chv__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define chv__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define chv__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define chv__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define chv__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define chv__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define chv__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define chv__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define chv__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define chv__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define chv__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define chv__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define chv__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define chv__tdl_2__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_2 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define chv__tdl_2__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_2 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define chv__tdl_2__ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_2 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define chv__tdl_2__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_2 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define chv__tdl_2__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_2 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define chv__tdl_2__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice2 Port 0 */
#define chv__tdl_2__thread_header12_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice2 Port 1 */
#define chv__tdl_2__thread_header12_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice1 Port 1 */
#define chv__tdl_2__thread_header11_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice0 Port 0 */
#define chv__tdl_2__thread_header10_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice0 Port 1 */
#define chv__tdl_2__thread_header10_ready_port1__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice1 Subslice1 Port 0 */
#define chv__tdl_2__thread_header11_ready_port0__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define chv__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* MDAPI testing set :: GPU Time Elapsed */
#define chv__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define chv__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define chv__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define chv__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define chv__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define chv__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define chv__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define chv__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define chv__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define chv__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define chv__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define chv__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define chv__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define sklgt2__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define sklgt2__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt2__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt2__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define sklgt2__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define sklgt2__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define sklgt2__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define sklgt2__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define sklgt2__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define sklgt2__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define sklgt2__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define sklgt2__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define sklgt2__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define sklgt2__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define sklgt2__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define sklgt2__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define sklgt2__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define sklgt2__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define sklgt2__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define sklgt2__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define sklgt2__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define sklgt2__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define sklgt2__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define sklgt2__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define sklgt2__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define sklgt2__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define sklgt2__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define sklgt2__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define sklgt2__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define sklgt2__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define sklgt2__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define sklgt2__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define sklgt2__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define sklgt2__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define sklgt2__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
static uint64_t
sklgt2__render_basic__sampler_l1_misses__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ 8 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = tmp0 * 8;

   return tmp1;
}

/* Render Metrics Basic set :: SLM Bytes Read */
#define sklgt2__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define sklgt2__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define sklgt2__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define sklgt2__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
static uint64_t
sklgt2__render_basic__l3_lookups__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: $SamplerL1Misses $ShaderMemoryAccesses UADD */
   uint64_t tmp0 = sklgt2__render_basic__sampler_l1_misses__read(perf, query, results) + sklgt2__render_basic__shader_memory_accesses__read(perf, query, results);

   return tmp0;
}

/* Render Metrics Basic set :: L3 Misses */
#define sklgt2__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
static uint64_t
sklgt2__render_basic__l3_sampler_throughput__read(UNUSED struct intel_perf_config *perf,
                                                  const struct intel_perf_query_info *query,
                                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: $SamplerL1Misses 64 UMUL */
   uint64_t tmp0 = sklgt2__render_basic__sampler_l1_misses__read(perf, query, results) * 64;

   return tmp0;
}

/* Render Metrics Basic set :: L3 Shader Throughput */
#define sklgt2__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define sklgt2__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define sklgt2__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define sklgt2__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define sklgt2__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define sklgt2__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define sklgt2__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define sklgt2__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define sklgt2__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define sklgt2__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define sklgt2__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define sklgt2__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt2__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt2__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define sklgt2__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define sklgt2__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define sklgt2__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define sklgt2__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define sklgt2__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define sklgt2__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define sklgt2__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define sklgt2__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define sklgt2__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define sklgt2__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define sklgt2__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define sklgt2__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define sklgt2__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define sklgt2__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define sklgt2__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define sklgt2__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define sklgt2__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define sklgt2__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define sklgt2__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define sklgt2__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define sklgt2__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define sklgt2__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define sklgt2__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define sklgt2__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define sklgt2__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define sklgt2__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define sklgt2__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define sklgt2__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define sklgt2__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define sklgt2__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define sklgt2__compute_basic__typed_bytes_read__read \
   bdw__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define sklgt2__compute_basic__typed_bytes_written__read \
   bdw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define sklgt2__compute_basic__untyped_bytes_read__read \
   bdw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define sklgt2__compute_basic__untyped_bytes_written__read \
   bdw__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define sklgt2__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define sklgt2__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define sklgt2__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define sklgt2__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define sklgt2__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define sklgt2__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define sklgt2__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define sklgt2__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define sklgt2__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define sklgt2__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define sklgt2__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define sklgt2__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define sklgt2__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define sklgt2__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define sklgt2__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define sklgt2__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define sklgt2__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define sklgt2__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define sklgt2__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define sklgt2__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define sklgt2__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define sklgt2__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define sklgt2__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define sklgt2__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define sklgt2__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define sklgt2__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define sklgt2__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define sklgt2__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define sklgt2__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define sklgt2__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define sklgt2__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define sklgt2__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define sklgt2__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define sklgt2__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define sklgt2__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define sklgt2__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define sklgt2__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define sklgt2__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define sklgt2__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define sklgt2__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define sklgt2__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define sklgt2__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define sklgt2__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define sklgt2__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define sklgt2__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define sklgt2__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define sklgt2__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define sklgt2__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt2__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt2__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define sklgt2__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define sklgt2__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define sklgt2__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define sklgt2__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define sklgt2__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define sklgt2__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define sklgt2__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define sklgt2__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define sklgt2__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define sklgt2__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define sklgt2__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define sklgt2__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define sklgt2__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define sklgt2__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define sklgt2__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define sklgt2__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define sklgt2__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define sklgt2__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define sklgt2__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define sklgt2__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define sklgt2__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define sklgt2__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define sklgt2__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define sklgt2__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define sklgt2__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define sklgt2__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define sklgt2__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define sklgt2__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define sklgt2__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define sklgt2__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define sklgt2__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define sklgt2__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define sklgt2__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define sklgt2__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define sklgt2__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define sklgt2__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define sklgt2__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define sklgt2__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define sklgt2__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define sklgt2__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt2__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt2__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define sklgt2__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define sklgt2__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define sklgt2__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define sklgt2__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define sklgt2__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define sklgt2__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define sklgt2__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define sklgt2__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define sklgt2__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define sklgt2__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define sklgt2__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define sklgt2__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define sklgt2__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define sklgt2__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define sklgt2__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define sklgt2__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define sklgt2__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define sklgt2__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define sklgt2__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define sklgt2__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define sklgt2__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define sklgt2__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define sklgt2__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define sklgt2__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define sklgt2__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define sklgt2__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define sklgt2__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define sklgt2__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define sklgt2__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define sklgt2__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define sklgt2__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define sklgt2__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define sklgt2__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define sklgt2__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define sklgt2__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define sklgt2__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define sklgt2__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define sklgt2__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended metrics set :: GPU Time Elapsed */
#define sklgt2__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended metrics set :: GPU Core Clocks */
#define sklgt2__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metrics set :: AVG GPU Core Frequency */
#define sklgt2__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended metrics set :: AVG GPU Core Frequency */
#define sklgt2__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended metrics set :: CS Threads Dispatched */
#define sklgt2__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended metrics set :: EU Active */
#define sklgt2__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended metrics set :: EU Stall */
#define sklgt2__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended metrics set :: EU Both FPU Pipes Active */
#define sklgt2__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended metrics set :: EU FPU0 Pipe Active */
#define sklgt2__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended metrics set :: EU FPU1 Pipe Active */
#define sklgt2__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended metrics set :: EU AVG IPC Rate */
#define sklgt2__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended metrics set :: EU Send Pipe Active */
#define sklgt2__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended metrics set :: EU Thread Occupancy */
#define sklgt2__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended metrics set :: Sampler Texels */
#define sklgt2__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended metrics set :: Sampler Texels Misses */
#define sklgt2__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended metrics set :: SLM Bytes Read */
#define sklgt2__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended metrics set :: SLM Bytes Written */
#define sklgt2__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended metrics set :: Shader Memory Accesses */
#define sklgt2__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended metrics set :: Shader Atomic Memory Accesses */
#define sklgt2__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended metrics set :: L3 Shader Throughput */
#define sklgt2__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended metrics set :: Shader Barrier Messages */
#define sklgt2__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended metrics set :: EuUntypedReads0 */
#define sklgt2__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended metrics set :: EuTypedReads0 */
#define sklgt2__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended metrics set :: EuUntypedWrites0 */
#define sklgt2__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended metrics set :: EuTypedWrites0 */
#define sklgt2__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended metrics set :: EuUntypedAtomics0 */
#define sklgt2__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended metrics set :: EuTypedAtomics0 */
#define sklgt2__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended metrics set :: EuA64UntypedReads0 */
#define sklgt2__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended metrics set :: EuA64UntypedWrites0 */
#define sklgt2__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended metrics set :: Typed Reads 0 */
#define sklgt2__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metrics set :: Typed Writes 0 */
#define sklgt2__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended metrics set :: Untyped Reads 0 */
#define sklgt2__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended metrics set :: Untyped Writes 0 */
#define sklgt2__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended metrics set :: Typed Atomics 0 */
#define sklgt2__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended metrics set :: TypedReadsPerCacheLine */
#define sklgt2__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended metrics set :: TypedWritesPerCacheLine */
#define sklgt2__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended metrics set :: UntypedReadsPerCacheLine */
#define sklgt2__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended metrics set :: UntypedWritesPerCacheLine */
#define sklgt2__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended metrics set :: TypedAtomicsPerCacheLine */
#define sklgt2__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache metrics set :: GPU Time Elapsed */
#define sklgt2__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache metrics set :: GPU Core Clocks */
#define sklgt2__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache metrics set :: AVG GPU Core Frequency */
#define sklgt2__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache metrics set :: AVG GPU Core Frequency */
#define sklgt2__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache metrics set :: GPU Busy */
#define sklgt2__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache metrics set :: VS Threads Dispatched */
#define sklgt2__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metrics set :: HS Threads Dispatched */
#define sklgt2__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache metrics set :: DS Threads Dispatched */
#define sklgt2__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache metrics set :: GS Threads Dispatched */
#define sklgt2__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metrics set :: FS Threads Dispatched */
#define sklgt2__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache metrics set :: CS Threads Dispatched */
#define sklgt2__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache metrics set :: EU Active */
#define sklgt2__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache metrics set :: EU Stall */
#define sklgt2__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache metrics set :: EU Both FPU Pipes Active */
#define sklgt2__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Pipe Active */
#define sklgt2__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Pipe Active */
#define sklgt2__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache metrics set :: EU AVG IPC Rate */
#define sklgt2__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache metrics set :: EU Send Pipe Active */
#define sklgt2__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Hybrid Instruction */
#define sklgt2__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Hybrid Instruction */
#define sklgt2__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Ternary Instruction */
#define sklgt2__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Ternary Instruction */
#define sklgt2__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Binary Instruction */
#define sklgt2__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Binary Instruction */
#define sklgt2__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Move Instruction */
#define sklgt2__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Move Instruction */
#define sklgt2__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache metrics set :: Rasterized Pixels */
#define sklgt2__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache metrics set :: Early Hi-Depth Test Fails */
#define sklgt2__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: Early Depth Test Fails */
#define sklgt2__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: Samples Killed in FS */
#define sklgt2__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache metrics set :: Pixels Failing Tests */
#define sklgt2__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache metrics set :: Samples Written */
#define sklgt2__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache metrics set :: Samples Blended */
#define sklgt2__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache metrics set :: Sampler Accesses */
#define sklgt2__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache metrics set :: Sampler Texels */
#define sklgt2__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache metrics set :: Sampler Texels Misses */
#define sklgt2__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache metrics set :: SLM Bytes Read */
#define sklgt2__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache metrics set :: SLM Bytes Written */
#define sklgt2__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache metrics set :: Shader Memory Accesses */
#define sklgt2__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache metrics set :: Shader Atomic Memory Accesses */
#define sklgt2__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache metrics set :: L3 Accesses */
static uint64_t
sklgt2__compute_l3_cache__l3_accesses__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ C 1 READ B 2 READ B 3 READ UADD UADD UADD 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   uint64_t tmp2 = results->accumulator[query->b_offset + 2];
   uint64_t tmp3 = results->accumulator[query->b_offset + 3];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = tmp1 + tmp4;
   uint64_t tmp6 = tmp0 + tmp5;
   uint64_t tmp7 = tmp6 * 2;

   return tmp7;
}

/* Compute Metrics L3 Cache metrics set :: L3 Misses */
#define sklgt2__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache metrics set :: L3 Sampler Throughput */
#define sklgt2__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache metrics set :: L3 Shader Throughput */
#define sklgt2__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache metrics set :: L3 Total Throughput */
static uint64_t
sklgt2__compute_l3_cache__l3_total_throughput__read(UNUSED struct intel_perf_config *perf,
                                                    const struct intel_perf_query_info *query,
                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: $L3Accesses 64 UMUL */
   uint64_t tmp0 = sklgt2__compute_l3_cache__l3_accesses__read(perf, query, results) * 64;

   return tmp0;
}

/* Compute Metrics L3 Cache metrics set :: Shader Barrier Messages */
#define sklgt2__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 00 Accesses */
#define sklgt2__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 01 Accesses */
#define sklgt2__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 02 Accesses */
#define sklgt2__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 03 Accesses */
#define sklgt2__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 00 IC Accesses */
#define sklgt2__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 00 IC Hits */
#define sklgt2__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache metrics set :: GTI L3 Throughput */
#define sklgt2__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache metrics set :: GTI Read Throughput */
#define sklgt2__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache metrics set :: GTI Write Throughput */
#define sklgt2__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define sklgt2__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define sklgt2__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define sklgt2__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define sklgt2__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define sklgt2__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define sklgt2__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define sklgt2__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define sklgt2__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define sklgt2__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define sklgt2__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define sklgt2__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define sklgt2__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define sklgt2__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define sklgt2__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define sklgt2__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define sklgt2__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define sklgt2__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define sklgt2__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define sklgt2__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define sklgt2__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define sklgt2__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define sklgt2__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define sklgt2__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define sklgt2__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define sklgt2__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define sklgt2__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define sklgt2__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define sklgt2__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define sklgt2__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define sklgt2__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define sklgt2__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define sklgt2__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define sklgt2__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define sklgt2__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define sklgt2__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define sklgt2__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define sklgt2__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define sklgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define sklgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define sklgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define sklgt2__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define sklgt2__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define sklgt2__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define sklgt2__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define sklgt2__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define sklgt2__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define sklgt2__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define sklgt2__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define sklgt2__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define sklgt2__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define sklgt2__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define sklgt2__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define sklgt2__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define sklgt2__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define sklgt2__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define sklgt2__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define sklgt2__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define sklgt2__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define sklgt2__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define sklgt2__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define sklgt2__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define sklgt2__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define sklgt2__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define sklgt2__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define sklgt2__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define sklgt2__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define sklgt2__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define sklgt2__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define sklgt2__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define sklgt2__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define sklgt2__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define sklgt2__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define sklgt2__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define sklgt2__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define sklgt2__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define sklgt2__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define sklgt2__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define sklgt2__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define sklgt2__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define sklgt2__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define sklgt2__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define sklgt2__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define sklgt2__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define sklgt2__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define sklgt2__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define sklgt2__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define sklgt2__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define sklgt2__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define sklgt2__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define sklgt2__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define sklgt2__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define sklgt2__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define sklgt2__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define sklgt2__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define sklgt2__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define sklgt2__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define sklgt2__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define sklgt2__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define sklgt2__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define sklgt2__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define sklgt2__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define sklgt2__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define sklgt2__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define sklgt2__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define sklgt2__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define sklgt2__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define sklgt2__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define sklgt2__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define sklgt2__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define sklgt2__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define sklgt2__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define sklgt2__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define sklgt2__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define sklgt2__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define sklgt2__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define sklgt2__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define sklgt2__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define sklgt2__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Stalled */
#define sklgt2__l3_2__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Active */
#define sklgt2__l3_2__l30_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define sklgt2__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define sklgt2__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define sklgt2__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define sklgt2__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define sklgt2__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define sklgt2__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define sklgt2__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define sklgt2__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define sklgt2__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define sklgt2__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define sklgt2__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define sklgt2__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define sklgt2__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define sklgt2__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define sklgt2__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define sklgt2__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define sklgt2__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define sklgt2__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define sklgt2__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define sklgt2__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define sklgt2__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define sklgt2__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define sklgt2__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define sklgt2__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define sklgt2__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define sklgt2__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define sklgt2__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define sklgt2__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define sklgt2__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define sklgt2__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define sklgt2__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define sklgt2__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define sklgt2__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define sklgt2__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define sklgt2__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define sklgt2__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define sklgt2__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define sklgt2__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define sklgt2__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define sklgt2__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define sklgt2__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define sklgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define sklgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define sklgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define sklgt2__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define sklgt2__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define sklgt2__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define sklgt2__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define sklgt2__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define sklgt2__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define sklgt2__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define sklgt2__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define sklgt2__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define sklgt2__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define sklgt2__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define sklgt2__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define sklgt2__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define sklgt2__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define sklgt2__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define sklgt2__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define sklgt2__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define sklgt2__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define sklgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define sklgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define sklgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define sklgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define sklgt2__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define sklgt2__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define sklgt2__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define sklgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define sklgt2__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define sklgt2__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define sklgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define sklgt2__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define sklgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define sklgt2__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define sklgt2__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define sklgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define sklgt2__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define sklgt2__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define sklgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define sklgt2__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define sklgt2__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define sklgt2__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define sklgt2__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define sklgt2__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define sklgt2__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define sklgt2__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define sklgt2__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define sklgt2__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define sklgt2__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define sklgt2__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define sklgt2__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define sklgt2__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define sklgt2__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define sklgt2__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define sklgt2__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define sklgt2__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define sklgt2__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define sklgt2__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define sklgt2__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define sklgt2__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define sklgt2__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define sklgt2__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define sklgt2__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define sklgt2__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define sklgt2__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define sklgt2__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define sklgt2__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define sklgt2__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define sklgt2__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define sklgt2__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define sklgt2__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define sklgt2__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define sklgt2__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define sklgt2__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define sklgt2__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define sklgt2__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define sklgt2__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define sklgt2__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define sklgt2__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define sklgt2__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define sklgt2__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define sklgt2__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define sklgt2__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define sklgt2__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define sklgt2__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define sklgt2__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define sklgt2__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define sklgt2__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define sklgt2__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define sklgt2__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define sklgt2__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define sklgt2__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define sklgt2__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define sklgt2__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define sklgt2__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define sklgt2__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define sklgt2__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define sklgt2__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define sklgt2__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define sklgt2__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define sklgt2__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define sklgt2__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define sklgt2__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define sklgt2__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define sklgt2__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define sklgt2__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define sklgt2__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define sklgt2__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define sklgt2__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define sklgt2__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define sklgt2__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define sklgt2__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define sklgt2__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define sklgt2__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define sklgt2__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define sklgt2__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define sklgt2__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define sklgt2__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define sklgt2__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define sklgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define sklgt2__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define sklgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define sklgt2__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define sklgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define sklgt2__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define sklgt2__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define sklgt2__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define sklgt2__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define sklgt2__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define sklgt2__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define sklgt2__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define sklgt2__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define sklgt2__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define sklgt2__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define sklgt2__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define sklgt2__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define sklgt2__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define sklgt2__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define sklgt2__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define sklgt2__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define sklgt2__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define sklgt2__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define sklgt2__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define sklgt2__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define sklgt2__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define sklgt2__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define sklgt2__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define sklgt2__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define sklgt2__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define sklgt2__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define sklgt2__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define sklgt2__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define sklgt2__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define sklgt2__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define sklgt2__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define sklgt2__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define sklgt2__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define sklgt2__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define sklgt2__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define sklgt2__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define sklgt2__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define sklgt2__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define sklgt2__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define sklgt2__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define sklgt2__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define sklgt2__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define sklgt2__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define sklgt2__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define sklgt2__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define sklgt2__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define sklgt2__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define sklgt2__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define sklgt2__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define sklgt2__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
static float
sklgt2__compute_extra__fpu1_active_adjusted__read(UNUSED struct intel_perf_config *perf,
                                                  const struct intel_perf_query_info *query,
                                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: A 8 READ  C 7 READ C 6 READ FADD C 5 READ FADD 8 FMUL FADD 100 FMUL $EuCoresTotalCount FDIV $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 8];
   uint64_t tmp1 = results->accumulator[query->c_offset + 7];
   uint64_t tmp2 = results->accumulator[query->c_offset + 6];
   double tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = results->accumulator[query->c_offset + 5];
   double tmp5 = tmp3 + tmp4;
   double tmp6 = tmp5 * 8;
   double tmp7 = tmp0 + tmp6;
   double tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = perf->sys_vars.n_eus;
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;
   double tmp12 = tmp11;
   double tmp13 = sklgt2__compute_extra__gpu_core_clocks__read(perf, query, results);
   double tmp14 = tmp13 ? tmp12 / tmp13 : 0;

   return tmp14;
}

/* Media Vme Pipe metrics set :: GPU Time Elapsed */
#define sklgt2__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metrics set :: GPU Core Clocks */
#define sklgt2__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define sklgt2__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define sklgt2__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metrics set :: GPU Busy */
#define sklgt2__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metrics set :: CS Threads Dispatched */
#define sklgt2__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metrics set :: EU Active */
#define sklgt2__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metrics set :: EU Stall */
#define sklgt2__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: EU Both FPU Pipes Active */
#define sklgt2__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metrics set :: EU Thread Occupancy */
#define sklgt2__vme_pipe__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* Media Vme Pipe metrics set :: VME Busy */
#define sklgt2__vme_pipe__vme_busy__read \
   bdw__vme_pipe__vme_busy__read

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define sklgt2__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define sklgt2__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define sklgt2__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define sklgt2__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: Render Ring Busy */
#define sklgt2__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define sklgt2__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define sklgt2__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define sklgt2__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define sklgt2__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* FF Bottlenecks :: GPU Time Elapsed */
#define sklgt2__ff_bottlenecks__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* FF Bottlenecks :: GPU Core Clocks */
#define sklgt2__ff_bottlenecks__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* FF Bottlenecks :: AVG GPU Core Frequency */
#define sklgt2__ff_bottlenecks__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* FF Bottlenecks :: AVG GPU Core Frequency */
#define sklgt2__ff_bottlenecks__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* FF Bottlenecks :: GPU Busy */
#define sklgt2__ff_bottlenecks__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* FF Bottlenecks :: VS Threads Dispatched */
#define sklgt2__ff_bottlenecks__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* FF Bottlenecks :: HS Threads Dispatched */
#define sklgt2__ff_bottlenecks__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* FF Bottlenecks :: DS Threads Dispatched */
#define sklgt2__ff_bottlenecks__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* FF Bottlenecks :: GS Threads Dispatched */
#define sklgt2__ff_bottlenecks__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* FF Bottlenecks :: FS Threads Dispatched */
#define sklgt2__ff_bottlenecks__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* FF Bottlenecks :: CS Threads Dispatched */
#define sklgt2__ff_bottlenecks__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* FF Bottlenecks :: Rasterized Pixels */
#define sklgt2__ff_bottlenecks__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* FF Bottlenecks :: Early Hi-Depth Test Fails */
#define sklgt2__ff_bottlenecks__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* FF Bottlenecks :: Early Depth Test Fails */
#define sklgt2__ff_bottlenecks__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* FF Bottlenecks :: Samples Killed in FS */
#define sklgt2__ff_bottlenecks__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* FF Bottlenecks :: Pixels Failing Tests */
#define sklgt2__ff_bottlenecks__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* FF Bottlenecks :: Samples Written */
#define sklgt2__ff_bottlenecks__samples_written__read \
   bdw__render_basic__samples_written__read

/* FF Bottlenecks :: Samples Blended */
#define sklgt2__ff_bottlenecks__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* FF Bottlenecks :: EU Active */
#define sklgt2__ff_bottlenecks__eu_active__read \
   bdw__render_basic__eu_active__read

/* FF Bottlenecks :: EU Stall */
#define sklgt2__ff_bottlenecks__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* FF Bottlenecks :: EU Both FPU Pipes Active */
#define sklgt2__ff_bottlenecks__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* FF Bottlenecks :: EU Thread Occupancy */
static float
sklgt2__ff_bottlenecks__eu_thread_occupancy__read(UNUSED struct intel_perf_config *perf,
                                                  const struct intel_perf_query_info *query,
                                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: 8 A 17 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 17];
   double tmp1 = 8 * tmp0;
   double tmp2 = tmp1;
   double tmp3 = perf->sys_vars.eu_threads_count;
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = perf->sys_vars.n_eus;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = sklgt2__ff_bottlenecks__gpu_core_clocks__read(perf, query, results);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* FF Bottlenecks :: VS FPU0 Pipe Active */
#define sklgt2__ff_bottlenecks__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* FF Bottlenecks :: VS FPU1 Pipe Active */
#define sklgt2__ff_bottlenecks__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* FF Bottlenecks :: PS FPU0 Pipe Active */
#define sklgt2__ff_bottlenecks__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* FF Bottlenecks :: PS FPU1 Pipe Active */
#define sklgt2__ff_bottlenecks__ps_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* FF Bottlenecks :: VS is sending vertices to CL */
static float
sklgt2__ff_bottlenecks__vs_cl_bypass__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 4 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 4];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 2;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   uint64_t tmp6 = tmp5 * 100;
   double tmp7 = tmp6;
   double tmp8 = sklgt2__ff_bottlenecks__gpu_core_clocks__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* FF Bottlenecks :: HS Bottleneck */
#define sklgt2__ff_bottlenecks__hs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* FF Bottlenecks :: DS Bottleneck */
static float
sklgt2__ff_bottlenecks__ds_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: B 1 READ B 5 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 2;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   uint64_t tmp6 = tmp5 * 100;
   double tmp7 = tmp6;
   double tmp8 = sklgt2__ff_bottlenecks__gpu_core_clocks__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* FF Bottlenecks :: TE Bottleneck */
static float
sklgt2__ff_bottlenecks__te_bottleneck__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ B 6 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 2;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   uint64_t tmp6 = tmp5 * 100;
   double tmp7 = tmp6;
   double tmp8 = sklgt2__ff_bottlenecks__gpu_core_clocks__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* FF Bottlenecks :: GS Bottleneck */
#define sklgt2__ff_bottlenecks__gs_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* MDAPI testing set :: GPU Time Elapsed */
#define sklgt2__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define sklgt2__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define sklgt2__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define sklgt2__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define sklgt2__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define sklgt2__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define sklgt2__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define sklgt2__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define sklgt2__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define sklgt2__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define sklgt2__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define sklgt2__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define sklgt2__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define sklgt2__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define sklgt2__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define sklgt2__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define sklgt2__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
static float
sklgt2__pma__stall__stc_pma_stall__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 1 READ FADD 2 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 2;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   uint64_t tmp6 = tmp5 * 100;
   double tmp7 = tmp6;
   double tmp8 = sklgt2__pma__stall__gpu_core_clocks__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* AsyncCompute :: GPU Time Elapsed */
#define sklgt2__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute :: GPU Core Clocks */
#define sklgt2__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define sklgt2__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define sklgt2__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute :: GPU Busy */
#define sklgt2__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute :: VS Threads Dispatched */
#define sklgt2__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute :: HS Threads Dispatched */
#define sklgt2__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute :: DS Threads Dispatched */
#define sklgt2__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute :: GS Threads Dispatched */
#define sklgt2__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute :: FS Threads Dispatched */
#define sklgt2__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute :: CS Threads Dispatched */
#define sklgt2__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute :: EU FPU0 Pipe Active */
#define sklgt2__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute :: VS FPU0 Pipe Active */
#define sklgt2__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute :: PS FPU0 Pipe Active */
#define sklgt2__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute :: CS FPU0 Pipe Active */
#define sklgt2__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute :: EU FPU1 Pipe Active */
#define sklgt2__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute :: VS FPU1 Pipe Active */
#define sklgt2__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute :: PS FPU1 Pipe Active */
#define sklgt2__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute :: CS FPU1 Pipe Active */
#define sklgt2__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute :: EU Thread Occupancy */
#define sklgt2__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute :: EU Active */
#define sklgt2__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute :: EU Stall */
#define sklgt2__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define sklgt3__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define sklgt3__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt3__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt3__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define sklgt3__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define sklgt3__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define sklgt3__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define sklgt3__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define sklgt3__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define sklgt3__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define sklgt3__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define sklgt3__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define sklgt3__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define sklgt3__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define sklgt3__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define sklgt3__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define sklgt3__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define sklgt3__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define sklgt3__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define sklgt3__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define sklgt3__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define sklgt3__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define sklgt3__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define sklgt3__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define sklgt3__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define sklgt3__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define sklgt3__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define sklgt3__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define sklgt3__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define sklgt3__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define sklgt3__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define sklgt3__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define sklgt3__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define sklgt3__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define sklgt3__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
#define sklgt3__render_basic__sampler_l1_misses__read \
   bdw__render_basic__sampler_l1_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define sklgt3__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define sklgt3__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define sklgt3__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define sklgt3__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
#define sklgt3__render_basic__l3_lookups__read \
   bdw__render_basic__l3_lookups__read

/* Render Metrics Basic set :: L3 Misses */
#define sklgt3__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
#define sklgt3__render_basic__l3_sampler_throughput__read \
   bdw__render_basic__l3_sampler_throughput__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define sklgt3__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define sklgt3__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define sklgt3__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define sklgt3__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define sklgt3__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define sklgt3__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define sklgt3__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define sklgt3__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define sklgt3__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define sklgt3__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define sklgt3__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define sklgt3__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt3__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt3__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define sklgt3__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define sklgt3__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define sklgt3__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define sklgt3__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define sklgt3__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define sklgt3__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define sklgt3__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define sklgt3__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define sklgt3__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define sklgt3__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define sklgt3__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define sklgt3__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define sklgt3__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define sklgt3__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define sklgt3__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define sklgt3__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define sklgt3__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define sklgt3__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define sklgt3__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define sklgt3__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define sklgt3__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define sklgt3__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define sklgt3__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define sklgt3__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define sklgt3__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define sklgt3__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define sklgt3__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define sklgt3__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define sklgt3__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define sklgt3__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define sklgt3__compute_basic__typed_bytes_read__read \
   bdw__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define sklgt3__compute_basic__typed_bytes_written__read \
   bdw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define sklgt3__compute_basic__untyped_bytes_read__read \
   bdw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define sklgt3__compute_basic__untyped_bytes_written__read \
   bdw__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define sklgt3__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define sklgt3__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define sklgt3__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define sklgt3__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define sklgt3__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define sklgt3__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define sklgt3__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define sklgt3__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define sklgt3__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define sklgt3__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define sklgt3__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define sklgt3__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define sklgt3__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define sklgt3__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define sklgt3__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define sklgt3__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define sklgt3__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define sklgt3__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define sklgt3__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define sklgt3__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define sklgt3__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define sklgt3__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define sklgt3__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define sklgt3__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define sklgt3__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define sklgt3__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define sklgt3__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define sklgt3__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define sklgt3__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define sklgt3__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define sklgt3__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define sklgt3__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define sklgt3__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define sklgt3__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define sklgt3__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define sklgt3__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define sklgt3__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define sklgt3__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define sklgt3__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define sklgt3__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define sklgt3__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define sklgt3__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define sklgt3__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define sklgt3__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define sklgt3__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define sklgt3__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define sklgt3__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define sklgt3__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt3__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt3__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define sklgt3__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define sklgt3__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define sklgt3__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define sklgt3__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define sklgt3__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define sklgt3__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define sklgt3__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define sklgt3__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define sklgt3__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define sklgt3__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define sklgt3__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define sklgt3__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define sklgt3__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define sklgt3__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define sklgt3__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define sklgt3__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define sklgt3__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define sklgt3__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define sklgt3__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define sklgt3__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define sklgt3__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define sklgt3__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define sklgt3__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define sklgt3__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define sklgt3__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define sklgt3__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define sklgt3__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define sklgt3__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define sklgt3__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define sklgt3__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define sklgt3__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define sklgt3__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define sklgt3__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define sklgt3__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define sklgt3__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define sklgt3__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define sklgt3__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define sklgt3__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define sklgt3__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define sklgt3__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt3__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt3__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define sklgt3__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define sklgt3__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define sklgt3__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define sklgt3__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define sklgt3__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define sklgt3__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define sklgt3__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define sklgt3__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define sklgt3__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define sklgt3__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define sklgt3__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define sklgt3__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define sklgt3__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define sklgt3__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define sklgt3__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define sklgt3__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define sklgt3__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define sklgt3__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define sklgt3__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define sklgt3__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define sklgt3__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define sklgt3__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define sklgt3__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define sklgt3__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define sklgt3__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define sklgt3__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define sklgt3__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define sklgt3__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define sklgt3__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define sklgt3__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define sklgt3__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define sklgt3__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define sklgt3__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define sklgt3__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define sklgt3__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define sklgt3__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define sklgt3__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define sklgt3__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended set :: GPU Time Elapsed */
#define sklgt3__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended set :: GPU Core Clocks */
#define sklgt3__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define sklgt3__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define sklgt3__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended set :: CS Threads Dispatched */
#define sklgt3__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended set :: EU Active */
#define sklgt3__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended set :: EU Stall */
#define sklgt3__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended set :: EU Both FPU Pipes Active */
#define sklgt3__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended set :: EU FPU0 Pipe Active */
#define sklgt3__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended set :: EU FPU1 Pipe Active */
#define sklgt3__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended set :: EU AVG IPC Rate */
#define sklgt3__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended set :: EU Send Pipe Active */
#define sklgt3__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended set :: EU Thread Occupancy */
#define sklgt3__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended set :: Sampler Texels */
#define sklgt3__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended set :: Sampler Texels Misses */
#define sklgt3__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended set :: SLM Bytes Read */
#define sklgt3__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended set :: SLM Bytes Written */
#define sklgt3__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended set :: Shader Memory Accesses */
#define sklgt3__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended set :: Shader Atomic Memory Accesses */
#define sklgt3__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended set :: L3 Shader Throughput */
#define sklgt3__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended set :: Shader Barrier Messages */
#define sklgt3__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended set :: EuUntypedReads0 */
#define sklgt3__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended set :: EuTypedReads0 */
#define sklgt3__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended set :: EuUntypedWrites0 */
#define sklgt3__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended set :: EuTypedWrites0 */
#define sklgt3__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended set :: EuUntypedAtomics0 */
#define sklgt3__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended set :: EuTypedAtomics0 */
#define sklgt3__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedReads0 */
#define sklgt3__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedWrites0 */
#define sklgt3__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended set :: Typed Reads 0 */
#define sklgt3__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: Typed Writes 0 */
#define sklgt3__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended set :: Untyped Reads 0 */
#define sklgt3__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended set :: Untyped Writes 0 */
#define sklgt3__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended set :: Typed Atomics 0 */
#define sklgt3__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended set :: TypedReadsPerCacheLine */
#define sklgt3__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended set :: TypedWritesPerCacheLine */
#define sklgt3__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended set :: UntypedReadsPerCacheLine */
#define sklgt3__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended set :: UntypedWritesPerCacheLine */
#define sklgt3__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended set :: TypedAtomicsPerCacheLine */
#define sklgt3__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache set :: GPU Time Elapsed */
#define sklgt3__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache set :: GPU Core Clocks */
#define sklgt3__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define sklgt3__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define sklgt3__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache set :: GPU Busy */
#define sklgt3__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache set :: VS Threads Dispatched */
#define sklgt3__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: HS Threads Dispatched */
#define sklgt3__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache set :: DS Threads Dispatched */
#define sklgt3__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache set :: GS Threads Dispatched */
#define sklgt3__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: FS Threads Dispatched */
#define sklgt3__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache set :: CS Threads Dispatched */
#define sklgt3__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache set :: EU Active */
#define sklgt3__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache set :: EU Stall */
#define sklgt3__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache set :: EU Both FPU Pipes Active */
#define sklgt3__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Pipe Active */
#define sklgt3__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Pipe Active */
#define sklgt3__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU AVG IPC Rate */
#define sklgt3__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache set :: EU Send Pipe Active */
#define sklgt3__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Hybrid Instruction */
#define sklgt3__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Hybrid Instruction */
#define sklgt3__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU0 Ternary Instruction */
#define sklgt3__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Ternary Instruction */
#define sklgt3__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Binary Instruction */
#define sklgt3__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Binary Instruction */
#define sklgt3__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Move Instruction */
#define sklgt3__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Move Instruction */
#define sklgt3__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: Rasterized Pixels */
#define sklgt3__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache set :: Early Hi-Depth Test Fails */
#define sklgt3__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Early Depth Test Fails */
#define sklgt3__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Samples Killed in FS */
#define sklgt3__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache set :: Pixels Failing Tests */
#define sklgt3__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache set :: Samples Written */
#define sklgt3__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache set :: Samples Blended */
#define sklgt3__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache set :: Sampler Accesses */
#define sklgt3__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache set :: Sampler Texels */
#define sklgt3__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache set :: Sampler Texels Misses */
#define sklgt3__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache set :: SLM Bytes Read */
#define sklgt3__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache set :: SLM Bytes Written */
#define sklgt3__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache set :: Shader Memory Accesses */
#define sklgt3__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache set :: Shader Atomic Memory Accesses */
#define sklgt3__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache set :: L3 Accesses */
#define sklgt3__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache set :: L3 Misses */
#define sklgt3__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache set :: L3 Sampler Throughput */
#define sklgt3__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache set :: L3 Shader Throughput */
#define sklgt3__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache set :: L3 Total Throughput */
#define sklgt3__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache set :: Shader Barrier Messages */
#define sklgt3__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 Accesses */
#define sklgt3__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 01 Accesses */
#define sklgt3__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 02 Accesses */
#define sklgt3__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 03 Accesses */
#define sklgt3__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Accesses */
#define sklgt3__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Hits */
#define sklgt3__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache set :: GTI L3 Throughput */
#define sklgt3__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache set :: GTI Read Throughput */
#define sklgt3__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache set :: GTI Write Throughput */
#define sklgt3__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define sklgt3__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define sklgt3__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define sklgt3__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define sklgt3__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define sklgt3__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define sklgt3__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define sklgt3__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define sklgt3__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define sklgt3__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define sklgt3__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define sklgt3__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define sklgt3__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define sklgt3__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define sklgt3__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define sklgt3__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define sklgt3__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define sklgt3__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define sklgt3__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define sklgt3__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define sklgt3__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define sklgt3__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define sklgt3__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define sklgt3__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define sklgt3__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define sklgt3__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define sklgt3__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define sklgt3__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define sklgt3__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define sklgt3__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define sklgt3__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define sklgt3__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define sklgt3__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define sklgt3__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define sklgt3__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define sklgt3__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define sklgt3__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define sklgt3__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define sklgt3__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define sklgt3__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define sklgt3__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define sklgt3__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define sklgt3__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define sklgt3__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define sklgt3__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define sklgt3__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define sklgt3__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define sklgt3__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define sklgt3__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define sklgt3__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define sklgt3__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define sklgt3__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define sklgt3__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define sklgt3__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define sklgt3__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define sklgt3__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define sklgt3__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define sklgt3__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define sklgt3__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define sklgt3__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define sklgt3__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define sklgt3__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define sklgt3__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define sklgt3__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define sklgt3__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define sklgt3__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define sklgt3__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define sklgt3__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define sklgt3__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define sklgt3__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define sklgt3__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define sklgt3__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define sklgt3__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define sklgt3__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define sklgt3__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define sklgt3__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define sklgt3__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define sklgt3__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define sklgt3__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define sklgt3__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define sklgt3__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define sklgt3__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define sklgt3__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define sklgt3__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define sklgt3__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define sklgt3__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define sklgt3__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define sklgt3__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define sklgt3__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define sklgt3__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define sklgt3__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define sklgt3__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define sklgt3__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define sklgt3__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define sklgt3__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define sklgt3__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define sklgt3__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define sklgt3__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define sklgt3__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define sklgt3__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define sklgt3__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define sklgt3__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define sklgt3__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define sklgt3__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define sklgt3__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define sklgt3__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define sklgt3__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define sklgt3__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define sklgt3__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define sklgt3__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define sklgt3__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define sklgt3__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define sklgt3__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define sklgt3__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define sklgt3__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define sklgt3__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define sklgt3__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define sklgt3__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define sklgt3__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Stalled */
#define sklgt3__l3_2__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Active */
#define sklgt3__l3_2__l30_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define sklgt3__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define sklgt3__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define sklgt3__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define sklgt3__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define sklgt3__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define sklgt3__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define sklgt3__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define sklgt3__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define sklgt3__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define sklgt3__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define sklgt3__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define sklgt3__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define sklgt3__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define sklgt3__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define sklgt3__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define sklgt3__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define sklgt3__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define sklgt3__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define sklgt3__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define sklgt3__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define sklgt3__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define sklgt3__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define sklgt3__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define sklgt3__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define sklgt3__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define sklgt3__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define sklgt3__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define sklgt3__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define sklgt3__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define sklgt3__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define sklgt3__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define sklgt3__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define sklgt3__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define sklgt3__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define sklgt3__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define sklgt3__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define sklgt3__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define sklgt3__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define sklgt3__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define sklgt3__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define sklgt3__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define sklgt3__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define sklgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define sklgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define sklgt3__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define sklgt3__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define sklgt3__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define sklgt3__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define sklgt3__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define sklgt3__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define sklgt3__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define sklgt3__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define sklgt3__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define sklgt3__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define sklgt3__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define sklgt3__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define sklgt3__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define sklgt3__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define sklgt3__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define sklgt3__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define sklgt3__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define sklgt3__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define sklgt3__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define sklgt3__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define sklgt3__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define sklgt3__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define sklgt3__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define sklgt3__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define sklgt3__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define sklgt3__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define sklgt3__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define sklgt3__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define sklgt3__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define sklgt3__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define sklgt3__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define sklgt3__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define sklgt3__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define sklgt3__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define sklgt3__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define sklgt3__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define sklgt3__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define sklgt3__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define sklgt3__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define sklgt3__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define sklgt3__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define sklgt3__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define sklgt3__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define sklgt3__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define sklgt3__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define sklgt3__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define sklgt3__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define sklgt3__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define sklgt3__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define sklgt3__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define sklgt3__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define sklgt3__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define sklgt3__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define sklgt3__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define sklgt3__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define sklgt3__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define sklgt3__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define sklgt3__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define sklgt3__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define sklgt3__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define sklgt3__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define sklgt3__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define sklgt3__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define sklgt3__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define sklgt3__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define sklgt3__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define sklgt3__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define sklgt3__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define sklgt3__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define sklgt3__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define sklgt3__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define sklgt3__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define sklgt3__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define sklgt3__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define sklgt3__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define sklgt3__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define sklgt3__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define sklgt3__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define sklgt3__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define sklgt3__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define sklgt3__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define sklgt3__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define sklgt3__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define sklgt3__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define sklgt3__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define sklgt3__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define sklgt3__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define sklgt3__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define sklgt3__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define sklgt3__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define sklgt3__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define sklgt3__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define sklgt3__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define sklgt3__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define sklgt3__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define sklgt3__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define sklgt3__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define sklgt3__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define sklgt3__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define sklgt3__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define sklgt3__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define sklgt3__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define sklgt3__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define sklgt3__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define sklgt3__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define sklgt3__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define sklgt3__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define sklgt3__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define sklgt3__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define sklgt3__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define sklgt3__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define sklgt3__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define sklgt3__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define sklgt3__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define sklgt3__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define sklgt3__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define sklgt3__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define sklgt3__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define sklgt3__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define sklgt3__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define sklgt3__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define sklgt3__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define sklgt3__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define sklgt3__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define sklgt3__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define sklgt3__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define sklgt3__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define sklgt3__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define sklgt3__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define sklgt3__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define sklgt3__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define sklgt3__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define sklgt3__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define sklgt3__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define sklgt3__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define sklgt3__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define sklgt3__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define sklgt3__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define sklgt3__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define sklgt3__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define sklgt3__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define sklgt3__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define sklgt3__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define sklgt3__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define sklgt3__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define sklgt3__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define sklgt3__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define sklgt3__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define sklgt3__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define sklgt3__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define sklgt3__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define sklgt3__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define sklgt3__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define sklgt3__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define sklgt3__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define sklgt3__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define sklgt3__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define sklgt3__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define sklgt3__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define sklgt3__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define sklgt3__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define sklgt3__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define sklgt3__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define sklgt3__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define sklgt3__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define sklgt3__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define sklgt3__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define sklgt3__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define sklgt3__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define sklgt3__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define sklgt3__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define sklgt3__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
#define sklgt3__compute_extra__fpu1_active_adjusted__read \
   bdw__compute_extra__fpu1_active_adjusted__read

/* Media Vme Pipe metrics set :: GPU Time Elapsed */
#define sklgt3__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metrics set :: GPU Core Clocks */
#define sklgt3__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define sklgt3__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define sklgt3__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metrics set :: GPU Busy */
#define sklgt3__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metrics set :: CS Threads Dispatched */
#define sklgt3__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metrics set :: EU Active */
#define sklgt3__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metrics set :: EU Stall */
#define sklgt3__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: EU Both FPU Pipes Active */
#define sklgt3__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metrics set :: EU Thread Occupancy */
#define sklgt3__vme_pipe__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* Media Vme Pipe metrics set :: VME Busy */
#define sklgt3__vme_pipe__vme_busy__read \
   bdw__vme_pipe__vme_busy__read

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define sklgt3__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define sklgt3__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define sklgt3__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define sklgt3__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: Render Ring Busy */
#define sklgt3__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define sklgt3__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vdbox1 Ring Busy */
#define sklgt3__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define sklgt3__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define sklgt3__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define sklgt3__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define sklgt3__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define sklgt3__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define sklgt3__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define sklgt3__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define sklgt3__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define sklgt3__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define sklgt3__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define sklgt3__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define sklgt3__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define sklgt3__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define sklgt3__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define sklgt3__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define sklgt3__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define sklgt3__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define sklgt3__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define sklgt3__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define sklgt3__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define sklgt3__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* AsyncCompute :: GPU Time Elapsed */
#define sklgt3__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute :: GPU Core Clocks */
#define sklgt3__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define sklgt3__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define sklgt3__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute :: GPU Busy */
#define sklgt3__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute :: VS Threads Dispatched */
#define sklgt3__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute :: HS Threads Dispatched */
#define sklgt3__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute :: DS Threads Dispatched */
#define sklgt3__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute :: GS Threads Dispatched */
#define sklgt3__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute :: FS Threads Dispatched */
#define sklgt3__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute :: CS Threads Dispatched */
#define sklgt3__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute :: EU FPU0 Pipe Active */
#define sklgt3__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute :: VS FPU0 Pipe Active */
#define sklgt3__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute :: PS FPU0 Pipe Active */
#define sklgt3__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute :: CS FPU0 Pipe Active */
#define sklgt3__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute :: EU FPU1 Pipe Active */
#define sklgt3__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute :: VS FPU1 Pipe Active */
#define sklgt3__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute :: PS FPU1 Pipe Active */
#define sklgt3__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute :: CS FPU1 Pipe Active */
#define sklgt3__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute :: EU Thread Occupancy */
#define sklgt3__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute :: EU Active */
#define sklgt3__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute :: EU Stall */
#define sklgt3__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define sklgt4__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define sklgt4__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt4__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt4__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define sklgt4__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define sklgt4__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define sklgt4__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define sklgt4__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define sklgt4__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define sklgt4__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define sklgt4__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define sklgt4__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define sklgt4__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define sklgt4__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define sklgt4__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define sklgt4__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define sklgt4__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define sklgt4__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define sklgt4__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define sklgt4__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define sklgt4__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define sklgt4__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define sklgt4__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define sklgt4__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define sklgt4__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define sklgt4__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define sklgt4__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define sklgt4__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define sklgt4__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define sklgt4__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define sklgt4__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define sklgt4__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define sklgt4__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define sklgt4__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
static uint64_t
sklgt4__render_basic__sampler_l1_misses__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ B 5 READ UADD B 3 READ UADD 8 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 3];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = tmp4 * 8;

   return tmp5;
}

/* Render Metrics Basic set :: SLM Bytes Read */
#define sklgt4__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define sklgt4__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define sklgt4__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define sklgt4__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
static uint64_t
sklgt4__render_basic__l3_lookups__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: $SamplerL1Misses $ShaderMemoryAccesses UADD */
   uint64_t tmp0 = sklgt4__render_basic__sampler_l1_misses__read(perf, query, results) + sklgt4__render_basic__shader_memory_accesses__read(perf, query, results);

   return tmp0;
}

/* Render Metrics Basic set :: L3 Misses */
#define sklgt4__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
static uint64_t
sklgt4__render_basic__l3_sampler_throughput__read(UNUSED struct intel_perf_config *perf,
                                                  const struct intel_perf_query_info *query,
                                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: $SamplerL1Misses 64 UMUL */
   uint64_t tmp0 = sklgt4__render_basic__sampler_l1_misses__read(perf, query, results) * 64;

   return tmp0;
}

/* Render Metrics Basic set :: L3 Shader Throughput */
#define sklgt4__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define sklgt4__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define sklgt4__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define sklgt4__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define sklgt4__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define sklgt4__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define sklgt4__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define sklgt4__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define sklgt4__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define sklgt4__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define sklgt4__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define sklgt4__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt4__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define sklgt4__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define sklgt4__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define sklgt4__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define sklgt4__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define sklgt4__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define sklgt4__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define sklgt4__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define sklgt4__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define sklgt4__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define sklgt4__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define sklgt4__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define sklgt4__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define sklgt4__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define sklgt4__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define sklgt4__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define sklgt4__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define sklgt4__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define sklgt4__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define sklgt4__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define sklgt4__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define sklgt4__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define sklgt4__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define sklgt4__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define sklgt4__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define sklgt4__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define sklgt4__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define sklgt4__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define sklgt4__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define sklgt4__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define sklgt4__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define sklgt4__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define sklgt4__compute_basic__typed_bytes_read__read \
   bdw__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define sklgt4__compute_basic__typed_bytes_written__read \
   bdw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define sklgt4__compute_basic__untyped_bytes_read__read \
   bdw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define sklgt4__compute_basic__untyped_bytes_written__read \
   bdw__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define sklgt4__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define sklgt4__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define sklgt4__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define sklgt4__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define sklgt4__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define sklgt4__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define sklgt4__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define sklgt4__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define sklgt4__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define sklgt4__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define sklgt4__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define sklgt4__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define sklgt4__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define sklgt4__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define sklgt4__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define sklgt4__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define sklgt4__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define sklgt4__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define sklgt4__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define sklgt4__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define sklgt4__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define sklgt4__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define sklgt4__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define sklgt4__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define sklgt4__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define sklgt4__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define sklgt4__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define sklgt4__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define sklgt4__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define sklgt4__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define sklgt4__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define sklgt4__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define sklgt4__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define sklgt4__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define sklgt4__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define sklgt4__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define sklgt4__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define sklgt4__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define sklgt4__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define sklgt4__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define sklgt4__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define sklgt4__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define sklgt4__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define sklgt4__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define sklgt4__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define sklgt4__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define sklgt4__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define sklgt4__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt4__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt4__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define sklgt4__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define sklgt4__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define sklgt4__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define sklgt4__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define sklgt4__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define sklgt4__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define sklgt4__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define sklgt4__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define sklgt4__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define sklgt4__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define sklgt4__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define sklgt4__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define sklgt4__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define sklgt4__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define sklgt4__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define sklgt4__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define sklgt4__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define sklgt4__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define sklgt4__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define sklgt4__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define sklgt4__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define sklgt4__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define sklgt4__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define sklgt4__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define sklgt4__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define sklgt4__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define sklgt4__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define sklgt4__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define sklgt4__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define sklgt4__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define sklgt4__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define sklgt4__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define sklgt4__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define sklgt4__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define sklgt4__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define sklgt4__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define sklgt4__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define sklgt4__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define sklgt4__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define sklgt4__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt4__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define sklgt4__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define sklgt4__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define sklgt4__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define sklgt4__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define sklgt4__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define sklgt4__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define sklgt4__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define sklgt4__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define sklgt4__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define sklgt4__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define sklgt4__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define sklgt4__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define sklgt4__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define sklgt4__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define sklgt4__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define sklgt4__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define sklgt4__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define sklgt4__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define sklgt4__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define sklgt4__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define sklgt4__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define sklgt4__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define sklgt4__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define sklgt4__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define sklgt4__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define sklgt4__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define sklgt4__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define sklgt4__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define sklgt4__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define sklgt4__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define sklgt4__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define sklgt4__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define sklgt4__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define sklgt4__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define sklgt4__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define sklgt4__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define sklgt4__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define sklgt4__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define sklgt4__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended set :: GPU Time Elapsed */
#define sklgt4__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended set :: GPU Core Clocks */
#define sklgt4__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define sklgt4__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define sklgt4__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended set :: CS Threads Dispatched */
#define sklgt4__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended set :: EU Active */
#define sklgt4__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended set :: EU Stall */
#define sklgt4__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended set :: EU Both FPU Pipes Active */
#define sklgt4__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended set :: EU FPU0 Pipe Active */
#define sklgt4__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended set :: EU FPU1 Pipe Active */
#define sklgt4__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended set :: EU AVG IPC Rate */
#define sklgt4__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended set :: EU Send Pipe Active */
#define sklgt4__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended set :: EU Thread Occupancy */
#define sklgt4__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended set :: Sampler Texels */
#define sklgt4__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended set :: Sampler Texels Misses */
#define sklgt4__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended set :: SLM Bytes Read */
#define sklgt4__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended set :: SLM Bytes Written */
#define sklgt4__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended set :: Shader Memory Accesses */
#define sklgt4__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended set :: Shader Atomic Memory Accesses */
#define sklgt4__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended set :: L3 Shader Throughput */
#define sklgt4__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended set :: Shader Barrier Messages */
#define sklgt4__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended set :: EuUntypedReads0 */
#define sklgt4__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended set :: EuTypedReads0 */
#define sklgt4__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended set :: EuUntypedWrites0 */
#define sklgt4__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended set :: EuTypedWrites0 */
#define sklgt4__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended set :: EuUntypedAtomics0 */
#define sklgt4__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended set :: EuTypedAtomics0 */
#define sklgt4__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedReads0 */
#define sklgt4__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedWrites0 */
#define sklgt4__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended set :: Typed Reads 0 */
#define sklgt4__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: Typed Writes 0 */
#define sklgt4__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended set :: Untyped Reads 0 */
#define sklgt4__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended set :: Untyped Writes 0 */
#define sklgt4__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended set :: Typed Atomics 0 */
#define sklgt4__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended set :: TypedReadsPerCacheLine */
#define sklgt4__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended set :: TypedWritesPerCacheLine */
#define sklgt4__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended set :: UntypedReadsPerCacheLine */
#define sklgt4__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended set :: UntypedWritesPerCacheLine */
#define sklgt4__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended set :: TypedAtomicsPerCacheLine */
#define sklgt4__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache set :: GPU Time Elapsed */
#define sklgt4__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache set :: GPU Core Clocks */
#define sklgt4__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define sklgt4__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define sklgt4__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache set :: GPU Busy */
#define sklgt4__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache set :: VS Threads Dispatched */
#define sklgt4__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: HS Threads Dispatched */
#define sklgt4__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache set :: DS Threads Dispatched */
#define sklgt4__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache set :: GS Threads Dispatched */
#define sklgt4__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: FS Threads Dispatched */
#define sklgt4__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache set :: CS Threads Dispatched */
#define sklgt4__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache set :: EU Active */
#define sklgt4__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache set :: EU Stall */
#define sklgt4__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache set :: EU Both FPU Pipes Active */
#define sklgt4__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Pipe Active */
#define sklgt4__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Pipe Active */
#define sklgt4__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU AVG IPC Rate */
#define sklgt4__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache set :: EU Send Pipe Active */
#define sklgt4__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Hybrid Instruction */
#define sklgt4__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Hybrid Instruction */
#define sklgt4__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU0 Ternary Instruction */
#define sklgt4__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Ternary Instruction */
#define sklgt4__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Binary Instruction */
#define sklgt4__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Binary Instruction */
#define sklgt4__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Move Instruction */
#define sklgt4__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Move Instruction */
#define sklgt4__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: Rasterized Pixels */
#define sklgt4__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache set :: Early Hi-Depth Test Fails */
#define sklgt4__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Early Depth Test Fails */
#define sklgt4__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Samples Killed in FS */
#define sklgt4__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache set :: Pixels Failing Tests */
#define sklgt4__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache set :: Samples Written */
#define sklgt4__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache set :: Samples Blended */
#define sklgt4__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache set :: Sampler Accesses */
#define sklgt4__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache set :: Sampler Texels */
#define sklgt4__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache set :: Sampler Texels Misses */
#define sklgt4__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache set :: SLM Bytes Read */
#define sklgt4__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache set :: SLM Bytes Written */
#define sklgt4__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache set :: Shader Memory Accesses */
#define sklgt4__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache set :: Shader Atomic Memory Accesses */
#define sklgt4__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache set :: L3 Accesses */
#define sklgt4__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache set :: L3 Misses */
#define sklgt4__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache set :: L3 Sampler Throughput */
#define sklgt4__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache set :: L3 Shader Throughput */
#define sklgt4__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache set :: L3 Total Throughput */
#define sklgt4__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache set :: Shader Barrier Messages */
#define sklgt4__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 Accesses */
#define sklgt4__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 01 Accesses */
#define sklgt4__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 02 Accesses */
#define sklgt4__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 03 Accesses */
#define sklgt4__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Accesses */
#define sklgt4__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Hits */
#define sklgt4__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache set :: GTI L3 Throughput */
#define sklgt4__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache set :: GTI Read Throughput */
#define sklgt4__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache set :: GTI Write Throughput */
#define sklgt4__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define sklgt4__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define sklgt4__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define sklgt4__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define sklgt4__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define sklgt4__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define sklgt4__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define sklgt4__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define sklgt4__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define sklgt4__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define sklgt4__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define sklgt4__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define sklgt4__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define sklgt4__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define sklgt4__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define sklgt4__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define sklgt4__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define sklgt4__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define sklgt4__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define sklgt4__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define sklgt4__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define sklgt4__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define sklgt4__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define sklgt4__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define sklgt4__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define sklgt4__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define sklgt4__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define sklgt4__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define sklgt4__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define sklgt4__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define sklgt4__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define sklgt4__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define sklgt4__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define sklgt4__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define sklgt4__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define sklgt4__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define sklgt4__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define sklgt4__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define sklgt4__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define sklgt4__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define sklgt4__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define sklgt4__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define sklgt4__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define sklgt4__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define sklgt4__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define sklgt4__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define sklgt4__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define sklgt4__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define sklgt4__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define sklgt4__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define sklgt4__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define sklgt4__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define sklgt4__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define sklgt4__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define sklgt4__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define sklgt4__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define sklgt4__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define sklgt4__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define sklgt4__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define sklgt4__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define sklgt4__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define sklgt4__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define sklgt4__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define sklgt4__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define sklgt4__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define sklgt4__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define sklgt4__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define sklgt4__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define sklgt4__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define sklgt4__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define sklgt4__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define sklgt4__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define sklgt4__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define sklgt4__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define sklgt4__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define sklgt4__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define sklgt4__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define sklgt4__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define sklgt4__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define sklgt4__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define sklgt4__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define sklgt4__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define sklgt4__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define sklgt4__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define sklgt4__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define sklgt4__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define sklgt4__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define sklgt4__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define sklgt4__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define sklgt4__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define sklgt4__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define sklgt4__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define sklgt4__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define sklgt4__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define sklgt4__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define sklgt4__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define sklgt4__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define sklgt4__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define sklgt4__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define sklgt4__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define sklgt4__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define sklgt4__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define sklgt4__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define sklgt4__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define sklgt4__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define sklgt4__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define sklgt4__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define sklgt4__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define sklgt4__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define sklgt4__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define sklgt4__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define sklgt4__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define sklgt4__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define sklgt4__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define sklgt4__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define sklgt4__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define sklgt4__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define sklgt4__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define sklgt4__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Stalled */
#define sklgt4__l3_2__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Active */
#define sklgt4__l3_2__l30_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define sklgt4__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define sklgt4__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define sklgt4__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define sklgt4__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define sklgt4__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define sklgt4__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define sklgt4__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define sklgt4__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define sklgt4__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define sklgt4__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define sklgt4__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define sklgt4__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define sklgt4__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define sklgt4__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define sklgt4__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define sklgt4__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define sklgt4__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define sklgt4__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define sklgt4__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define sklgt4__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define sklgt4__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define sklgt4__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define sklgt4__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define sklgt4__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define sklgt4__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define sklgt4__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define sklgt4__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define sklgt4__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define sklgt4__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define sklgt4__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define sklgt4__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define sklgt4__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define sklgt4__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define sklgt4__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define sklgt4__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define sklgt4__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define sklgt4__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define sklgt4__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define sklgt4__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define sklgt4__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define sklgt4__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define sklgt4__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define sklgt4__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define sklgt4__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define sklgt4__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define sklgt4__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define sklgt4__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define sklgt4__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define sklgt4__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define sklgt4__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define sklgt4__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define sklgt4__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define sklgt4__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define sklgt4__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define sklgt4__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define sklgt4__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define sklgt4__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define sklgt4__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define sklgt4__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define sklgt4__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define sklgt4__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define sklgt4__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define sklgt4__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define sklgt4__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define sklgt4__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define sklgt4__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define sklgt4__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define sklgt4__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define sklgt4__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define sklgt4__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define sklgt4__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define sklgt4__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define sklgt4__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define sklgt4__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define sklgt4__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define sklgt4__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define sklgt4__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define sklgt4__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define sklgt4__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define sklgt4__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define sklgt4__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define sklgt4__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define sklgt4__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define sklgt4__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define sklgt4__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define sklgt4__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define sklgt4__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define sklgt4__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define sklgt4__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define sklgt4__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define sklgt4__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define sklgt4__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define sklgt4__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define sklgt4__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define sklgt4__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define sklgt4__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define sklgt4__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define sklgt4__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define sklgt4__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define sklgt4__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define sklgt4__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define sklgt4__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define sklgt4__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define sklgt4__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define sklgt4__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define sklgt4__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define sklgt4__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define sklgt4__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define sklgt4__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define sklgt4__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define sklgt4__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define sklgt4__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define sklgt4__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define sklgt4__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define sklgt4__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define sklgt4__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define sklgt4__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define sklgt4__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define sklgt4__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define sklgt4__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define sklgt4__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define sklgt4__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define sklgt4__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define sklgt4__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define sklgt4__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define sklgt4__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define sklgt4__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define sklgt4__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define sklgt4__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define sklgt4__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define sklgt4__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define sklgt4__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define sklgt4__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define sklgt4__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define sklgt4__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define sklgt4__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define sklgt4__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define sklgt4__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define sklgt4__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define sklgt4__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define sklgt4__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define sklgt4__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define sklgt4__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define sklgt4__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define sklgt4__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define sklgt4__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define sklgt4__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define sklgt4__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define sklgt4__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define sklgt4__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define sklgt4__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define sklgt4__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define sklgt4__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define sklgt4__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define sklgt4__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define sklgt4__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define sklgt4__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define sklgt4__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define sklgt4__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define sklgt4__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define sklgt4__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define sklgt4__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define sklgt4__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define sklgt4__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define sklgt4__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define sklgt4__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define sklgt4__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define sklgt4__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define sklgt4__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define sklgt4__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define sklgt4__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define sklgt4__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define sklgt4__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define sklgt4__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define sklgt4__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define sklgt4__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define sklgt4__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define sklgt4__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define sklgt4__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define sklgt4__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define sklgt4__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define sklgt4__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define sklgt4__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define sklgt4__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define sklgt4__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define sklgt4__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define sklgt4__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define sklgt4__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define sklgt4__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define sklgt4__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define sklgt4__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define sklgt4__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define sklgt4__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define sklgt4__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define sklgt4__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define sklgt4__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define sklgt4__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define sklgt4__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define sklgt4__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define sklgt4__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define sklgt4__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define sklgt4__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define sklgt4__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define sklgt4__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define sklgt4__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define sklgt4__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define sklgt4__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define sklgt4__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define sklgt4__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define sklgt4__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define sklgt4__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define sklgt4__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define sklgt4__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define sklgt4__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define sklgt4__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define sklgt4__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: GPU Time Elapsed */
#define sklgt4__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metrics set :: GPU Core Clocks */
#define sklgt4__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define sklgt4__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define sklgt4__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metrics set :: GPU Busy */
#define sklgt4__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metrics set :: CS Threads Dispatched */
#define sklgt4__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metrics set :: EU Active */
#define sklgt4__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metrics set :: EU Stall */
#define sklgt4__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: EU Both FPU Pipes Active */
#define sklgt4__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metrics set :: EU Thread Occupancy */
#define sklgt4__vme_pipe__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* Media Vme Pipe metrics set :: VME Busy */
#define sklgt4__vme_pipe__vme_busy__read \
   bdw__vme_pipe__vme_busy__read

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define sklgt4__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define sklgt4__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define sklgt4__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define sklgt4__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: Render Ring Busy */
#define sklgt4__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define sklgt4__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vdbox1 Ring Busy */
#define sklgt4__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define sklgt4__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define sklgt4__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define sklgt4__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define sklgt4__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define sklgt4__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define sklgt4__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define sklgt4__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define sklgt4__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define sklgt4__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define sklgt4__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define sklgt4__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define sklgt4__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define sklgt4__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define sklgt4__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define sklgt4__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define sklgt4__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define sklgt4__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define sklgt4__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define sklgt4__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define sklgt4__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define sklgt4__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* AsyncCompute :: GPU Time Elapsed */
#define sklgt4__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute :: GPU Core Clocks */
#define sklgt4__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define sklgt4__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define sklgt4__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute :: GPU Busy */
#define sklgt4__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute :: VS Threads Dispatched */
#define sklgt4__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute :: HS Threads Dispatched */
#define sklgt4__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute :: DS Threads Dispatched */
#define sklgt4__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute :: GS Threads Dispatched */
#define sklgt4__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute :: FS Threads Dispatched */
#define sklgt4__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute :: CS Threads Dispatched */
#define sklgt4__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute :: EU FPU0 Pipe Active */
#define sklgt4__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute :: VS FPU0 Pipe Active */
#define sklgt4__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute :: PS FPU0 Pipe Active */
#define sklgt4__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute :: CS FPU0 Pipe Active */
#define sklgt4__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute :: EU FPU1 Pipe Active */
#define sklgt4__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute :: VS FPU1 Pipe Active */
#define sklgt4__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute :: PS FPU1 Pipe Active */
#define sklgt4__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute :: CS FPU1 Pipe Active */
#define sklgt4__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute :: EU Thread Occupancy */
#define sklgt4__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute :: EU Active */
#define sklgt4__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute :: EU Stall */
#define sklgt4__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define kblgt2__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define kblgt2__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt2__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt2__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define kblgt2__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define kblgt2__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define kblgt2__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define kblgt2__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define kblgt2__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define kblgt2__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define kblgt2__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define kblgt2__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define kblgt2__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define kblgt2__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define kblgt2__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define kblgt2__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define kblgt2__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define kblgt2__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define kblgt2__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define kblgt2__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define kblgt2__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define kblgt2__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define kblgt2__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define kblgt2__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define kblgt2__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define kblgt2__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define kblgt2__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define kblgt2__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define kblgt2__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define kblgt2__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define kblgt2__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define kblgt2__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define kblgt2__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define kblgt2__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define kblgt2__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
#define kblgt2__render_basic__sampler_l1_misses__read \
   sklgt2__render_basic__sampler_l1_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define kblgt2__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define kblgt2__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define kblgt2__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define kblgt2__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
#define kblgt2__render_basic__l3_lookups__read \
   sklgt2__render_basic__l3_lookups__read

/* Render Metrics Basic set :: L3 Misses */
#define kblgt2__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
#define kblgt2__render_basic__l3_sampler_throughput__read \
   sklgt2__render_basic__l3_sampler_throughput__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define kblgt2__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define kblgt2__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define kblgt2__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define kblgt2__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define kblgt2__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define kblgt2__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define kblgt2__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define kblgt2__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define kblgt2__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define kblgt2__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define kblgt2__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define kblgt2__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt2__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt2__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define kblgt2__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define kblgt2__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define kblgt2__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define kblgt2__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define kblgt2__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define kblgt2__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define kblgt2__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define kblgt2__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define kblgt2__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define kblgt2__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define kblgt2__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define kblgt2__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define kblgt2__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define kblgt2__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define kblgt2__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define kblgt2__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define kblgt2__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define kblgt2__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define kblgt2__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define kblgt2__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define kblgt2__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define kblgt2__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define kblgt2__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define kblgt2__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define kblgt2__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define kblgt2__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define kblgt2__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define kblgt2__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define kblgt2__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define kblgt2__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define kblgt2__compute_basic__typed_bytes_read__read \
   bdw__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define kblgt2__compute_basic__typed_bytes_written__read \
   bdw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define kblgt2__compute_basic__untyped_bytes_read__read \
   bdw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define kblgt2__compute_basic__untyped_bytes_written__read \
   bdw__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define kblgt2__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define kblgt2__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define kblgt2__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define kblgt2__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define kblgt2__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define kblgt2__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define kblgt2__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define kblgt2__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define kblgt2__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define kblgt2__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define kblgt2__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define kblgt2__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define kblgt2__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define kblgt2__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define kblgt2__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define kblgt2__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define kblgt2__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define kblgt2__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define kblgt2__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define kblgt2__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define kblgt2__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define kblgt2__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define kblgt2__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define kblgt2__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define kblgt2__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define kblgt2__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define kblgt2__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define kblgt2__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define kblgt2__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define kblgt2__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define kblgt2__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define kblgt2__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define kblgt2__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define kblgt2__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define kblgt2__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define kblgt2__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define kblgt2__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define kblgt2__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define kblgt2__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define kblgt2__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define kblgt2__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define kblgt2__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define kblgt2__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define kblgt2__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define kblgt2__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define kblgt2__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define kblgt2__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define kblgt2__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt2__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt2__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define kblgt2__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define kblgt2__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define kblgt2__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define kblgt2__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define kblgt2__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define kblgt2__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define kblgt2__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define kblgt2__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define kblgt2__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define kblgt2__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define kblgt2__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define kblgt2__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define kblgt2__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define kblgt2__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define kblgt2__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define kblgt2__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define kblgt2__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define kblgt2__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define kblgt2__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define kblgt2__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define kblgt2__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define kblgt2__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define kblgt2__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define kblgt2__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define kblgt2__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define kblgt2__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define kblgt2__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define kblgt2__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define kblgt2__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define kblgt2__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define kblgt2__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define kblgt2__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define kblgt2__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define kblgt2__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define kblgt2__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define kblgt2__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define kblgt2__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define kblgt2__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define kblgt2__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define kblgt2__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt2__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt2__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define kblgt2__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define kblgt2__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define kblgt2__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define kblgt2__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define kblgt2__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define kblgt2__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define kblgt2__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define kblgt2__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define kblgt2__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define kblgt2__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define kblgt2__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define kblgt2__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define kblgt2__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define kblgt2__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define kblgt2__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define kblgt2__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define kblgt2__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define kblgt2__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define kblgt2__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define kblgt2__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define kblgt2__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define kblgt2__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define kblgt2__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define kblgt2__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define kblgt2__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define kblgt2__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define kblgt2__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define kblgt2__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define kblgt2__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define kblgt2__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define kblgt2__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define kblgt2__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define kblgt2__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define kblgt2__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define kblgt2__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define kblgt2__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define kblgt2__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define kblgt2__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended set :: GPU Time Elapsed */
#define kblgt2__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended set :: GPU Core Clocks */
#define kblgt2__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define kblgt2__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define kblgt2__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended set :: CS Threads Dispatched */
#define kblgt2__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended set :: EU Active */
#define kblgt2__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended set :: EU Stall */
#define kblgt2__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended set :: EU Both FPU Pipes Active */
#define kblgt2__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended set :: EU FPU0 Pipe Active */
#define kblgt2__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended set :: EU FPU1 Pipe Active */
#define kblgt2__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended set :: EU AVG IPC Rate */
#define kblgt2__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended set :: EU Send Pipe Active */
#define kblgt2__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended set :: EU Thread Occupancy */
#define kblgt2__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended set :: Sampler Texels */
#define kblgt2__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended set :: Sampler Texels Misses */
#define kblgt2__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended set :: SLM Bytes Read */
#define kblgt2__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended set :: SLM Bytes Written */
#define kblgt2__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended set :: Shader Memory Accesses */
#define kblgt2__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended set :: Shader Atomic Memory Accesses */
#define kblgt2__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended set :: L3 Shader Throughput */
#define kblgt2__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended set :: Shader Barrier Messages */
#define kblgt2__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended set :: EuUntypedReads0 */
#define kblgt2__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended set :: EuTypedReads0 */
#define kblgt2__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended set :: EuUntypedWrites0 */
#define kblgt2__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended set :: EuTypedWrites0 */
#define kblgt2__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended set :: EuUntypedAtomics0 */
#define kblgt2__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended set :: EuTypedAtomics0 */
#define kblgt2__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedReads0 */
#define kblgt2__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedWrites0 */
#define kblgt2__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended set :: Typed Reads 0 */
#define kblgt2__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: Typed Writes 0 */
#define kblgt2__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended set :: Untyped Reads 0 */
#define kblgt2__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended set :: Untyped Writes 0 */
#define kblgt2__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended set :: Typed Atomics 0 */
#define kblgt2__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended set :: TypedReadsPerCacheLine */
#define kblgt2__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended set :: TypedWritesPerCacheLine */
#define kblgt2__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended set :: UntypedReadsPerCacheLine */
#define kblgt2__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended set :: UntypedWritesPerCacheLine */
#define kblgt2__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended set :: TypedAtomicsPerCacheLine */
#define kblgt2__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache set :: GPU Time Elapsed */
#define kblgt2__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache set :: GPU Core Clocks */
#define kblgt2__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define kblgt2__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define kblgt2__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache set :: GPU Busy */
#define kblgt2__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache set :: VS Threads Dispatched */
#define kblgt2__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: HS Threads Dispatched */
#define kblgt2__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache set :: DS Threads Dispatched */
#define kblgt2__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache set :: GS Threads Dispatched */
#define kblgt2__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: FS Threads Dispatched */
#define kblgt2__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache set :: CS Threads Dispatched */
#define kblgt2__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache set :: EU Active */
#define kblgt2__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache set :: EU Stall */
#define kblgt2__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache set :: EU Both FPU Pipes Active */
#define kblgt2__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Pipe Active */
#define kblgt2__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Pipe Active */
#define kblgt2__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU AVG IPC Rate */
#define kblgt2__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache set :: EU Send Pipe Active */
#define kblgt2__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Hybrid Instruction */
#define kblgt2__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Hybrid Instruction */
#define kblgt2__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU0 Ternary Instruction */
#define kblgt2__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Ternary Instruction */
#define kblgt2__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Binary Instruction */
#define kblgt2__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Binary Instruction */
#define kblgt2__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Move Instruction */
#define kblgt2__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Move Instruction */
#define kblgt2__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: Rasterized Pixels */
#define kblgt2__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache set :: Early Hi-Depth Test Fails */
#define kblgt2__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Early Depth Test Fails */
#define kblgt2__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Samples Killed in FS */
#define kblgt2__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache set :: Pixels Failing Tests */
#define kblgt2__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache set :: Samples Written */
#define kblgt2__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache set :: Samples Blended */
#define kblgt2__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache set :: Sampler Accesses */
#define kblgt2__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache set :: Sampler Texels */
#define kblgt2__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache set :: Sampler Texels Misses */
#define kblgt2__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache set :: SLM Bytes Read */
#define kblgt2__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache set :: SLM Bytes Written */
#define kblgt2__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache set :: Shader Memory Accesses */
#define kblgt2__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache set :: Shader Atomic Memory Accesses */
#define kblgt2__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache set :: L3 Accesses */
#define kblgt2__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache set :: L3 Misses */
#define kblgt2__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache set :: L3 Sampler Throughput */
#define kblgt2__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache set :: L3 Shader Throughput */
#define kblgt2__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache set :: L3 Total Throughput */
#define kblgt2__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache set :: Shader Barrier Messages */
#define kblgt2__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 Accesses */
#define kblgt2__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 01 Accesses */
#define kblgt2__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 02 Accesses */
#define kblgt2__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 03 Accesses */
#define kblgt2__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Accesses */
#define kblgt2__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Hits */
#define kblgt2__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache set :: GTI L3 Throughput */
#define kblgt2__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache set :: GTI Read Throughput */
#define kblgt2__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache set :: GTI Write Throughput */
#define kblgt2__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define kblgt2__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define kblgt2__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define kblgt2__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define kblgt2__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define kblgt2__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define kblgt2__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define kblgt2__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define kblgt2__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define kblgt2__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define kblgt2__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define kblgt2__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define kblgt2__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define kblgt2__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define kblgt2__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define kblgt2__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define kblgt2__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define kblgt2__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define kblgt2__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define kblgt2__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define kblgt2__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define kblgt2__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define kblgt2__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define kblgt2__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define kblgt2__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define kblgt2__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define kblgt2__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define kblgt2__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define kblgt2__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define kblgt2__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define kblgt2__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define kblgt2__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define kblgt2__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define kblgt2__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define kblgt2__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define kblgt2__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define kblgt2__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define kblgt2__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define kblgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define kblgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define kblgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define kblgt2__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define kblgt2__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define kblgt2__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define kblgt2__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define kblgt2__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define kblgt2__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define kblgt2__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define kblgt2__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define kblgt2__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define kblgt2__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define kblgt2__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define kblgt2__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define kblgt2__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define kblgt2__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define kblgt2__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define kblgt2__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define kblgt2__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define kblgt2__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define kblgt2__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define kblgt2__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define kblgt2__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define kblgt2__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define kblgt2__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define kblgt2__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define kblgt2__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define kblgt2__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define kblgt2__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define kblgt2__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define kblgt2__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define kblgt2__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define kblgt2__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define kblgt2__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define kblgt2__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define kblgt2__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define kblgt2__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define kblgt2__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define kblgt2__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define kblgt2__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define kblgt2__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define kblgt2__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define kblgt2__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define kblgt2__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define kblgt2__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define kblgt2__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define kblgt2__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define kblgt2__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define kblgt2__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define kblgt2__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define kblgt2__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define kblgt2__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define kblgt2__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define kblgt2__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define kblgt2__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define kblgt2__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define kblgt2__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define kblgt2__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define kblgt2__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define kblgt2__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define kblgt2__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define kblgt2__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define kblgt2__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define kblgt2__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define kblgt2__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define kblgt2__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define kblgt2__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define kblgt2__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define kblgt2__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define kblgt2__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define kblgt2__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define kblgt2__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define kblgt2__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define kblgt2__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define kblgt2__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define kblgt2__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define kblgt2__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define kblgt2__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define kblgt2__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define kblgt2__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Stalled */
#define kblgt2__l3_2__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Active */
#define kblgt2__l3_2__l30_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define kblgt2__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define kblgt2__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define kblgt2__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define kblgt2__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define kblgt2__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define kblgt2__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define kblgt2__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define kblgt2__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define kblgt2__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define kblgt2__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define kblgt2__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define kblgt2__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define kblgt2__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define kblgt2__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define kblgt2__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define kblgt2__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define kblgt2__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define kblgt2__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define kblgt2__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define kblgt2__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define kblgt2__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define kblgt2__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define kblgt2__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define kblgt2__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define kblgt2__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define kblgt2__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define kblgt2__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define kblgt2__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define kblgt2__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define kblgt2__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define kblgt2__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define kblgt2__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define kblgt2__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define kblgt2__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define kblgt2__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define kblgt2__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define kblgt2__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define kblgt2__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define kblgt2__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define kblgt2__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define kblgt2__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define kblgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define kblgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define kblgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define kblgt2__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define kblgt2__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define kblgt2__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define kblgt2__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define kblgt2__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define kblgt2__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define kblgt2__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define kblgt2__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define kblgt2__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define kblgt2__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define kblgt2__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define kblgt2__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define kblgt2__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define kblgt2__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define kblgt2__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define kblgt2__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define kblgt2__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define kblgt2__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define kblgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define kblgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define kblgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define kblgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define kblgt2__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define kblgt2__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define kblgt2__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define kblgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define kblgt2__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define kblgt2__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define kblgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define kblgt2__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define kblgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define kblgt2__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define kblgt2__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define kblgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define kblgt2__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define kblgt2__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define kblgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define kblgt2__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define kblgt2__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define kblgt2__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define kblgt2__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define kblgt2__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define kblgt2__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define kblgt2__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define kblgt2__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define kblgt2__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define kblgt2__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define kblgt2__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define kblgt2__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define kblgt2__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define kblgt2__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define kblgt2__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define kblgt2__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define kblgt2__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define kblgt2__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define kblgt2__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define kblgt2__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define kblgt2__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define kblgt2__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define kblgt2__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define kblgt2__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define kblgt2__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define kblgt2__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define kblgt2__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define kblgt2__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define kblgt2__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define kblgt2__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define kblgt2__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define kblgt2__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define kblgt2__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define kblgt2__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define kblgt2__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define kblgt2__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define kblgt2__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define kblgt2__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define kblgt2__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define kblgt2__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define kblgt2__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define kblgt2__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define kblgt2__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define kblgt2__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define kblgt2__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define kblgt2__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define kblgt2__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define kblgt2__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define kblgt2__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define kblgt2__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define kblgt2__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define kblgt2__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define kblgt2__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define kblgt2__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define kblgt2__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define kblgt2__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define kblgt2__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define kblgt2__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define kblgt2__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define kblgt2__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define kblgt2__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define kblgt2__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define kblgt2__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define kblgt2__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define kblgt2__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define kblgt2__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define kblgt2__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define kblgt2__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define kblgt2__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define kblgt2__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define kblgt2__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define kblgt2__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define kblgt2__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define kblgt2__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define kblgt2__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define kblgt2__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define kblgt2__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define kblgt2__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define kblgt2__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define kblgt2__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define kblgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define kblgt2__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define kblgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define kblgt2__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define kblgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define kblgt2__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define kblgt2__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define kblgt2__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define kblgt2__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define kblgt2__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define kblgt2__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define kblgt2__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define kblgt2__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define kblgt2__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define kblgt2__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define kblgt2__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define kblgt2__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define kblgt2__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define kblgt2__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define kblgt2__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define kblgt2__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define kblgt2__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define kblgt2__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define kblgt2__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define kblgt2__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define kblgt2__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define kblgt2__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define kblgt2__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define kblgt2__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define kblgt2__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define kblgt2__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define kblgt2__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define kblgt2__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define kblgt2__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define kblgt2__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define kblgt2__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define kblgt2__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define kblgt2__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define kblgt2__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define kblgt2__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define kblgt2__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define kblgt2__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define kblgt2__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define kblgt2__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define kblgt2__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define kblgt2__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define kblgt2__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define kblgt2__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define kblgt2__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define kblgt2__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define kblgt2__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define kblgt2__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define kblgt2__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define kblgt2__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define kblgt2__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
#define kblgt2__compute_extra__fpu1_active_adjusted__read \
   sklgt2__compute_extra__fpu1_active_adjusted__read

/* Media Vme Pipe metrics set :: GPU Time Elapsed */
#define kblgt2__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metrics set :: GPU Core Clocks */
#define kblgt2__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define kblgt2__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define kblgt2__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metrics set :: GPU Busy */
#define kblgt2__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metrics set :: CS Threads Dispatched */
#define kblgt2__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metrics set :: EU Active */
#define kblgt2__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metrics set :: EU Stall */
#define kblgt2__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: EU Both FPU Pipes Active */
#define kblgt2__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metrics set :: EU Thread Occupancy */
#define kblgt2__vme_pipe__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* Media Vme Pipe metrics set :: VME Busy */
#define kblgt2__vme_pipe__vme_busy__read \
   bdw__vme_pipe__vme_busy__read

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define kblgt2__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define kblgt2__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define kblgt2__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define kblgt2__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: Render Ring Busy */
#define kblgt2__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define kblgt2__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define kblgt2__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define kblgt2__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define kblgt2__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define kblgt2__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define kblgt2__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define kblgt2__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define kblgt2__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define kblgt2__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define kblgt2__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define kblgt2__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define kblgt2__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define kblgt2__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define kblgt2__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define kblgt2__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define kblgt2__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define kblgt2__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define kblgt2__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define kblgt2__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define kblgt2__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define kblgt2__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define kblgt2__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* AsyncCompute :: GPU Time Elapsed */
#define kblgt2__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute :: GPU Core Clocks */
#define kblgt2__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define kblgt2__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define kblgt2__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute :: GPU Busy */
#define kblgt2__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute :: VS Threads Dispatched */
#define kblgt2__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute :: HS Threads Dispatched */
#define kblgt2__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute :: DS Threads Dispatched */
#define kblgt2__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute :: GS Threads Dispatched */
#define kblgt2__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute :: FS Threads Dispatched */
#define kblgt2__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute :: CS Threads Dispatched */
#define kblgt2__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute :: EU FPU0 Pipe Active */
#define kblgt2__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute :: VS FPU0 Pipe Active */
#define kblgt2__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute :: PS FPU0 Pipe Active */
#define kblgt2__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute :: CS FPU0 Pipe Active */
#define kblgt2__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute :: EU FPU1 Pipe Active */
#define kblgt2__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute :: VS FPU1 Pipe Active */
#define kblgt2__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute :: PS FPU1 Pipe Active */
#define kblgt2__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute :: CS FPU1 Pipe Active */
#define kblgt2__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute :: EU Thread Occupancy */
#define kblgt2__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute :: EU Active */
#define kblgt2__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute :: EU Stall */
#define kblgt2__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define kblgt3__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define kblgt3__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt3__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt3__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define kblgt3__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define kblgt3__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define kblgt3__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define kblgt3__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define kblgt3__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define kblgt3__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define kblgt3__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define kblgt3__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define kblgt3__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define kblgt3__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define kblgt3__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define kblgt3__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define kblgt3__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define kblgt3__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define kblgt3__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define kblgt3__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define kblgt3__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define kblgt3__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define kblgt3__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define kblgt3__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define kblgt3__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define kblgt3__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define kblgt3__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define kblgt3__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define kblgt3__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define kblgt3__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define kblgt3__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define kblgt3__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define kblgt3__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define kblgt3__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define kblgt3__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
#define kblgt3__render_basic__sampler_l1_misses__read \
   bdw__render_basic__sampler_l1_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define kblgt3__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define kblgt3__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define kblgt3__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define kblgt3__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
#define kblgt3__render_basic__l3_lookups__read \
   bdw__render_basic__l3_lookups__read

/* Render Metrics Basic set :: L3 Misses */
#define kblgt3__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
#define kblgt3__render_basic__l3_sampler_throughput__read \
   bdw__render_basic__l3_sampler_throughput__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define kblgt3__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define kblgt3__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define kblgt3__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define kblgt3__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define kblgt3__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define kblgt3__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define kblgt3__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define kblgt3__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define kblgt3__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define kblgt3__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define kblgt3__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define kblgt3__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt3__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define kblgt3__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define kblgt3__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define kblgt3__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define kblgt3__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define kblgt3__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define kblgt3__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define kblgt3__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define kblgt3__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define kblgt3__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define kblgt3__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define kblgt3__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define kblgt3__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define kblgt3__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define kblgt3__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define kblgt3__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define kblgt3__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define kblgt3__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define kblgt3__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define kblgt3__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define kblgt3__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define kblgt3__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define kblgt3__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define kblgt3__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define kblgt3__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define kblgt3__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define kblgt3__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define kblgt3__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define kblgt3__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define kblgt3__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define kblgt3__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define kblgt3__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define kblgt3__compute_basic__typed_bytes_read__read \
   bdw__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define kblgt3__compute_basic__typed_bytes_written__read \
   bdw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define kblgt3__compute_basic__untyped_bytes_read__read \
   bdw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define kblgt3__compute_basic__untyped_bytes_written__read \
   bdw__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define kblgt3__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define kblgt3__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define kblgt3__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define kblgt3__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define kblgt3__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define kblgt3__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define kblgt3__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define kblgt3__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define kblgt3__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define kblgt3__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define kblgt3__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define kblgt3__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define kblgt3__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define kblgt3__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define kblgt3__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define kblgt3__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define kblgt3__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define kblgt3__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define kblgt3__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define kblgt3__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define kblgt3__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define kblgt3__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define kblgt3__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define kblgt3__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define kblgt3__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define kblgt3__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define kblgt3__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define kblgt3__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define kblgt3__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define kblgt3__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define kblgt3__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define kblgt3__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define kblgt3__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define kblgt3__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define kblgt3__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define kblgt3__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define kblgt3__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define kblgt3__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define kblgt3__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define kblgt3__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define kblgt3__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define kblgt3__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define kblgt3__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define kblgt3__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define kblgt3__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define kblgt3__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define kblgt3__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define kblgt3__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt3__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt3__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define kblgt3__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define kblgt3__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define kblgt3__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define kblgt3__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define kblgt3__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define kblgt3__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define kblgt3__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define kblgt3__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define kblgt3__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define kblgt3__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define kblgt3__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define kblgt3__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define kblgt3__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define kblgt3__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define kblgt3__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define kblgt3__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define kblgt3__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define kblgt3__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define kblgt3__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define kblgt3__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define kblgt3__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define kblgt3__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define kblgt3__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define kblgt3__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define kblgt3__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define kblgt3__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define kblgt3__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define kblgt3__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define kblgt3__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define kblgt3__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define kblgt3__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define kblgt3__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define kblgt3__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define kblgt3__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define kblgt3__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define kblgt3__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define kblgt3__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define kblgt3__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define kblgt3__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define kblgt3__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt3__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define kblgt3__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define kblgt3__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define kblgt3__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define kblgt3__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define kblgt3__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define kblgt3__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define kblgt3__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define kblgt3__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define kblgt3__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define kblgt3__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define kblgt3__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define kblgt3__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define kblgt3__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define kblgt3__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define kblgt3__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define kblgt3__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define kblgt3__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define kblgt3__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define kblgt3__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define kblgt3__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define kblgt3__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define kblgt3__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define kblgt3__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define kblgt3__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define kblgt3__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define kblgt3__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define kblgt3__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define kblgt3__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define kblgt3__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define kblgt3__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define kblgt3__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define kblgt3__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define kblgt3__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define kblgt3__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define kblgt3__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define kblgt3__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define kblgt3__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define kblgt3__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define kblgt3__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended set :: GPU Time Elapsed */
#define kblgt3__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended set :: GPU Core Clocks */
#define kblgt3__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define kblgt3__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define kblgt3__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended set :: CS Threads Dispatched */
#define kblgt3__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended set :: EU Active */
#define kblgt3__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended set :: EU Stall */
#define kblgt3__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended set :: EU Both FPU Pipes Active */
#define kblgt3__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended set :: EU FPU0 Pipe Active */
#define kblgt3__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended set :: EU FPU1 Pipe Active */
#define kblgt3__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended set :: EU AVG IPC Rate */
#define kblgt3__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended set :: EU Send Pipe Active */
#define kblgt3__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended set :: EU Thread Occupancy */
#define kblgt3__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended set :: Sampler Texels */
#define kblgt3__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended set :: Sampler Texels Misses */
#define kblgt3__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended set :: SLM Bytes Read */
#define kblgt3__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended set :: SLM Bytes Written */
#define kblgt3__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended set :: Shader Memory Accesses */
#define kblgt3__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended set :: Shader Atomic Memory Accesses */
#define kblgt3__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended set :: L3 Shader Throughput */
#define kblgt3__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended set :: Shader Barrier Messages */
#define kblgt3__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended set :: EuUntypedReads0 */
#define kblgt3__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended set :: EuTypedReads0 */
#define kblgt3__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended set :: EuUntypedWrites0 */
#define kblgt3__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended set :: EuTypedWrites0 */
#define kblgt3__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended set :: EuUntypedAtomics0 */
#define kblgt3__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended set :: EuTypedAtomics0 */
#define kblgt3__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedReads0 */
#define kblgt3__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedWrites0 */
#define kblgt3__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended set :: Typed Reads 0 */
#define kblgt3__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: Typed Writes 0 */
#define kblgt3__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended set :: Untyped Reads 0 */
#define kblgt3__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended set :: Untyped Writes 0 */
#define kblgt3__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended set :: Typed Atomics 0 */
#define kblgt3__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended set :: TypedReadsPerCacheLine */
#define kblgt3__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended set :: TypedWritesPerCacheLine */
#define kblgt3__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended set :: UntypedReadsPerCacheLine */
#define kblgt3__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended set :: UntypedWritesPerCacheLine */
#define kblgt3__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended set :: TypedAtomicsPerCacheLine */
#define kblgt3__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache set :: GPU Time Elapsed */
#define kblgt3__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache set :: GPU Core Clocks */
#define kblgt3__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define kblgt3__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define kblgt3__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache set :: GPU Busy */
#define kblgt3__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache set :: VS Threads Dispatched */
#define kblgt3__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: HS Threads Dispatched */
#define kblgt3__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache set :: DS Threads Dispatched */
#define kblgt3__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache set :: GS Threads Dispatched */
#define kblgt3__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: FS Threads Dispatched */
#define kblgt3__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache set :: CS Threads Dispatched */
#define kblgt3__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache set :: EU Active */
#define kblgt3__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache set :: EU Stall */
#define kblgt3__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache set :: EU Both FPU Pipes Active */
#define kblgt3__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Pipe Active */
#define kblgt3__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Pipe Active */
#define kblgt3__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU AVG IPC Rate */
#define kblgt3__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache set :: EU Send Pipe Active */
#define kblgt3__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Hybrid Instruction */
#define kblgt3__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Hybrid Instruction */
#define kblgt3__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU0 Ternary Instruction */
#define kblgt3__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Ternary Instruction */
#define kblgt3__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Binary Instruction */
#define kblgt3__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Binary Instruction */
#define kblgt3__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Move Instruction */
#define kblgt3__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Move Instruction */
#define kblgt3__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: Rasterized Pixels */
#define kblgt3__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache set :: Early Hi-Depth Test Fails */
#define kblgt3__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Early Depth Test Fails */
#define kblgt3__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Samples Killed in FS */
#define kblgt3__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache set :: Pixels Failing Tests */
#define kblgt3__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache set :: Samples Written */
#define kblgt3__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache set :: Samples Blended */
#define kblgt3__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache set :: Sampler Accesses */
#define kblgt3__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache set :: Sampler Texels */
#define kblgt3__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache set :: Sampler Texels Misses */
#define kblgt3__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache set :: SLM Bytes Read */
#define kblgt3__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache set :: SLM Bytes Written */
#define kblgt3__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache set :: Shader Memory Accesses */
#define kblgt3__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache set :: Shader Atomic Memory Accesses */
#define kblgt3__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache set :: L3 Accesses */
#define kblgt3__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache set :: L3 Misses */
#define kblgt3__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache set :: L3 Sampler Throughput */
#define kblgt3__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache set :: L3 Shader Throughput */
#define kblgt3__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache set :: L3 Total Throughput */
#define kblgt3__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache set :: Shader Barrier Messages */
#define kblgt3__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 Accesses */
#define kblgt3__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 01 Accesses */
#define kblgt3__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 02 Accesses */
#define kblgt3__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 03 Accesses */
#define kblgt3__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Accesses */
#define kblgt3__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Hits */
#define kblgt3__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache set :: GTI L3 Throughput */
#define kblgt3__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache set :: GTI Read Throughput */
#define kblgt3__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache set :: GTI Write Throughput */
#define kblgt3__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define kblgt3__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define kblgt3__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define kblgt3__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define kblgt3__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define kblgt3__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define kblgt3__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define kblgt3__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define kblgt3__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define kblgt3__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define kblgt3__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define kblgt3__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define kblgt3__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define kblgt3__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define kblgt3__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define kblgt3__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define kblgt3__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define kblgt3__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define kblgt3__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define kblgt3__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define kblgt3__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define kblgt3__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define kblgt3__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define kblgt3__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define kblgt3__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define kblgt3__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define kblgt3__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define kblgt3__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define kblgt3__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define kblgt3__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define kblgt3__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define kblgt3__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define kblgt3__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define kblgt3__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define kblgt3__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define kblgt3__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define kblgt3__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define kblgt3__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define kblgt3__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define kblgt3__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define kblgt3__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define kblgt3__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define kblgt3__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define kblgt3__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define kblgt3__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define kblgt3__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define kblgt3__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define kblgt3__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define kblgt3__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define kblgt3__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define kblgt3__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define kblgt3__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define kblgt3__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define kblgt3__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define kblgt3__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define kblgt3__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define kblgt3__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define kblgt3__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define kblgt3__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define kblgt3__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define kblgt3__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define kblgt3__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define kblgt3__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define kblgt3__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define kblgt3__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define kblgt3__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define kblgt3__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define kblgt3__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define kblgt3__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define kblgt3__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define kblgt3__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define kblgt3__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define kblgt3__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define kblgt3__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define kblgt3__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define kblgt3__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define kblgt3__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define kblgt3__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define kblgt3__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define kblgt3__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define kblgt3__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define kblgt3__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define kblgt3__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define kblgt3__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define kblgt3__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define kblgt3__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define kblgt3__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define kblgt3__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define kblgt3__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define kblgt3__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define kblgt3__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define kblgt3__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define kblgt3__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define kblgt3__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define kblgt3__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define kblgt3__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define kblgt3__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define kblgt3__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define kblgt3__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define kblgt3__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define kblgt3__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define kblgt3__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define kblgt3__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define kblgt3__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define kblgt3__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define kblgt3__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define kblgt3__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define kblgt3__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define kblgt3__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define kblgt3__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define kblgt3__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define kblgt3__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define kblgt3__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define kblgt3__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define kblgt3__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define kblgt3__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define kblgt3__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define kblgt3__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define kblgt3__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Stalled */
#define kblgt3__l3_2__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Active */
#define kblgt3__l3_2__l30_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define kblgt3__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define kblgt3__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define kblgt3__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define kblgt3__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define kblgt3__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define kblgt3__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define kblgt3__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define kblgt3__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define kblgt3__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define kblgt3__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define kblgt3__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define kblgt3__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define kblgt3__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define kblgt3__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define kblgt3__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define kblgt3__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define kblgt3__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define kblgt3__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define kblgt3__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define kblgt3__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define kblgt3__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define kblgt3__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define kblgt3__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define kblgt3__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define kblgt3__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define kblgt3__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define kblgt3__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define kblgt3__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define kblgt3__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define kblgt3__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define kblgt3__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define kblgt3__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define kblgt3__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define kblgt3__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define kblgt3__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define kblgt3__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define kblgt3__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define kblgt3__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define kblgt3__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define kblgt3__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define kblgt3__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define kblgt3__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define kblgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define kblgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define kblgt3__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define kblgt3__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define kblgt3__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define kblgt3__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define kblgt3__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define kblgt3__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define kblgt3__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define kblgt3__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define kblgt3__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define kblgt3__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define kblgt3__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define kblgt3__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define kblgt3__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define kblgt3__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define kblgt3__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define kblgt3__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define kblgt3__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define kblgt3__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define kblgt3__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define kblgt3__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define kblgt3__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define kblgt3__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define kblgt3__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define kblgt3__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define kblgt3__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define kblgt3__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define kblgt3__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define kblgt3__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define kblgt3__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define kblgt3__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define kblgt3__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define kblgt3__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define kblgt3__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define kblgt3__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define kblgt3__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define kblgt3__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define kblgt3__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define kblgt3__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define kblgt3__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define kblgt3__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define kblgt3__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define kblgt3__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define kblgt3__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define kblgt3__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define kblgt3__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define kblgt3__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define kblgt3__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define kblgt3__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define kblgt3__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define kblgt3__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define kblgt3__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define kblgt3__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define kblgt3__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define kblgt3__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define kblgt3__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define kblgt3__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define kblgt3__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define kblgt3__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define kblgt3__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define kblgt3__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define kblgt3__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define kblgt3__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define kblgt3__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define kblgt3__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define kblgt3__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define kblgt3__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define kblgt3__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define kblgt3__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define kblgt3__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define kblgt3__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define kblgt3__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define kblgt3__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define kblgt3__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define kblgt3__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define kblgt3__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define kblgt3__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define kblgt3__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define kblgt3__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define kblgt3__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define kblgt3__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define kblgt3__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define kblgt3__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define kblgt3__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define kblgt3__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define kblgt3__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define kblgt3__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define kblgt3__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define kblgt3__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define kblgt3__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define kblgt3__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define kblgt3__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define kblgt3__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define kblgt3__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define kblgt3__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define kblgt3__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define kblgt3__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define kblgt3__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define kblgt3__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define kblgt3__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define kblgt3__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define kblgt3__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define kblgt3__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define kblgt3__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define kblgt3__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define kblgt3__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define kblgt3__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define kblgt3__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define kblgt3__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define kblgt3__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define kblgt3__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define kblgt3__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define kblgt3__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define kblgt3__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define kblgt3__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define kblgt3__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define kblgt3__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define kblgt3__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define kblgt3__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define kblgt3__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define kblgt3__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define kblgt3__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define kblgt3__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define kblgt3__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define kblgt3__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define kblgt3__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define kblgt3__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define kblgt3__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define kblgt3__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define kblgt3__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define kblgt3__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define kblgt3__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define kblgt3__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define kblgt3__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define kblgt3__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define kblgt3__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define kblgt3__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define kblgt3__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define kblgt3__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define kblgt3__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define kblgt3__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define kblgt3__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define kblgt3__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define kblgt3__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define kblgt3__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define kblgt3__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define kblgt3__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define kblgt3__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define kblgt3__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define kblgt3__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define kblgt3__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define kblgt3__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define kblgt3__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define kblgt3__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define kblgt3__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define kblgt3__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define kblgt3__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define kblgt3__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define kblgt3__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define kblgt3__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define kblgt3__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define kblgt3__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define kblgt3__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define kblgt3__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define kblgt3__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define kblgt3__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define kblgt3__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define kblgt3__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define kblgt3__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define kblgt3__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define kblgt3__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define kblgt3__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define kblgt3__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
#define kblgt3__compute_extra__fpu1_active_adjusted__read \
   bdw__compute_extra__fpu1_active_adjusted__read

/* Media Vme Pipe metrics set :: GPU Time Elapsed */
#define kblgt3__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metrics set :: GPU Core Clocks */
#define kblgt3__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define kblgt3__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define kblgt3__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metrics set :: GPU Busy */
#define kblgt3__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metrics set :: CS Threads Dispatched */
#define kblgt3__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metrics set :: EU Active */
#define kblgt3__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metrics set :: EU Stall */
#define kblgt3__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: EU Both FPU Pipes Active */
#define kblgt3__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metrics set :: EU Thread Occupancy */
#define kblgt3__vme_pipe__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* Media Vme Pipe metrics set :: VME Busy */
#define kblgt3__vme_pipe__vme_busy__read \
   bdw__vme_pipe__vme_busy__read

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define kblgt3__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define kblgt3__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define kblgt3__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define kblgt3__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: Render Ring Busy */
#define kblgt3__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define kblgt3__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vdbox1 Ring Busy */
#define kblgt3__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define kblgt3__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define kblgt3__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define kblgt3__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define kblgt3__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define kblgt3__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define kblgt3__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define kblgt3__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define kblgt3__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define kblgt3__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define kblgt3__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define kblgt3__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define kblgt3__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define kblgt3__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define kblgt3__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define kblgt3__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define kblgt3__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define kblgt3__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define kblgt3__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define kblgt3__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define kblgt3__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define kblgt3__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* AsyncCompute :: GPU Time Elapsed */
#define kblgt3__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute :: GPU Core Clocks */
#define kblgt3__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define kblgt3__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define kblgt3__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute :: GPU Busy */
#define kblgt3__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute :: VS Threads Dispatched */
#define kblgt3__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute :: HS Threads Dispatched */
#define kblgt3__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute :: DS Threads Dispatched */
#define kblgt3__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute :: GS Threads Dispatched */
#define kblgt3__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute :: FS Threads Dispatched */
#define kblgt3__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute :: CS Threads Dispatched */
#define kblgt3__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute :: EU FPU0 Pipe Active */
#define kblgt3__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute :: VS FPU0 Pipe Active */
#define kblgt3__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute :: PS FPU0 Pipe Active */
#define kblgt3__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute :: CS FPU0 Pipe Active */
#define kblgt3__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute :: EU FPU1 Pipe Active */
#define kblgt3__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute :: VS FPU1 Pipe Active */
#define kblgt3__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute :: PS FPU1 Pipe Active */
#define kblgt3__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute :: CS FPU1 Pipe Active */
#define kblgt3__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute :: EU Thread Occupancy */
#define kblgt3__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute :: EU Active */
#define kblgt3__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute :: EU Stall */
#define kblgt3__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define cflgt2__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define cflgt2__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt2__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt2__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define cflgt2__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define cflgt2__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define cflgt2__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define cflgt2__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define cflgt2__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define cflgt2__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define cflgt2__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define cflgt2__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define cflgt2__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define cflgt2__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define cflgt2__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define cflgt2__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define cflgt2__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define cflgt2__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define cflgt2__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define cflgt2__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define cflgt2__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define cflgt2__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define cflgt2__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define cflgt2__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define cflgt2__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define cflgt2__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define cflgt2__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define cflgt2__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define cflgt2__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define cflgt2__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define cflgt2__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define cflgt2__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define cflgt2__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define cflgt2__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define cflgt2__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
#define cflgt2__render_basic__sampler_l1_misses__read \
   sklgt2__render_basic__sampler_l1_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define cflgt2__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define cflgt2__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define cflgt2__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define cflgt2__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
#define cflgt2__render_basic__l3_lookups__read \
   sklgt2__render_basic__l3_lookups__read

/* Render Metrics Basic set :: L3 Misses */
#define cflgt2__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
#define cflgt2__render_basic__l3_sampler_throughput__read \
   sklgt2__render_basic__l3_sampler_throughput__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define cflgt2__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define cflgt2__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define cflgt2__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define cflgt2__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define cflgt2__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define cflgt2__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define cflgt2__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define cflgt2__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define cflgt2__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define cflgt2__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define cflgt2__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define cflgt2__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt2__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt2__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define cflgt2__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define cflgt2__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define cflgt2__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define cflgt2__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define cflgt2__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define cflgt2__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define cflgt2__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define cflgt2__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define cflgt2__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define cflgt2__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define cflgt2__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define cflgt2__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define cflgt2__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define cflgt2__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define cflgt2__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define cflgt2__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define cflgt2__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define cflgt2__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define cflgt2__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define cflgt2__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define cflgt2__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define cflgt2__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define cflgt2__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define cflgt2__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define cflgt2__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define cflgt2__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define cflgt2__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define cflgt2__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define cflgt2__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define cflgt2__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define cflgt2__compute_basic__typed_bytes_read__read \
   bdw__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define cflgt2__compute_basic__typed_bytes_written__read \
   bdw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define cflgt2__compute_basic__untyped_bytes_read__read \
   bdw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define cflgt2__compute_basic__untyped_bytes_written__read \
   bdw__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define cflgt2__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define cflgt2__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define cflgt2__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define cflgt2__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define cflgt2__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define cflgt2__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define cflgt2__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define cflgt2__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define cflgt2__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define cflgt2__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define cflgt2__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define cflgt2__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define cflgt2__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define cflgt2__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define cflgt2__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define cflgt2__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define cflgt2__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define cflgt2__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define cflgt2__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define cflgt2__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define cflgt2__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define cflgt2__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define cflgt2__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define cflgt2__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define cflgt2__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define cflgt2__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define cflgt2__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define cflgt2__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define cflgt2__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define cflgt2__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define cflgt2__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define cflgt2__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define cflgt2__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define cflgt2__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define cflgt2__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define cflgt2__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define cflgt2__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define cflgt2__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define cflgt2__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define cflgt2__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define cflgt2__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define cflgt2__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define cflgt2__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define cflgt2__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define cflgt2__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define cflgt2__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define cflgt2__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define cflgt2__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt2__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt2__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define cflgt2__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define cflgt2__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define cflgt2__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define cflgt2__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define cflgt2__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define cflgt2__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define cflgt2__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define cflgt2__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define cflgt2__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define cflgt2__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define cflgt2__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define cflgt2__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define cflgt2__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define cflgt2__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define cflgt2__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define cflgt2__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define cflgt2__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define cflgt2__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define cflgt2__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define cflgt2__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define cflgt2__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define cflgt2__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define cflgt2__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define cflgt2__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define cflgt2__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define cflgt2__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define cflgt2__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define cflgt2__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define cflgt2__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define cflgt2__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define cflgt2__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define cflgt2__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define cflgt2__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define cflgt2__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define cflgt2__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define cflgt2__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define cflgt2__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define cflgt2__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define cflgt2__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define cflgt2__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt2__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt2__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define cflgt2__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define cflgt2__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define cflgt2__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define cflgt2__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define cflgt2__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define cflgt2__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define cflgt2__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define cflgt2__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define cflgt2__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define cflgt2__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define cflgt2__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define cflgt2__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define cflgt2__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define cflgt2__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define cflgt2__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define cflgt2__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define cflgt2__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define cflgt2__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define cflgt2__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define cflgt2__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define cflgt2__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define cflgt2__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define cflgt2__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define cflgt2__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define cflgt2__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define cflgt2__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define cflgt2__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define cflgt2__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define cflgt2__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define cflgt2__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define cflgt2__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define cflgt2__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define cflgt2__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define cflgt2__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define cflgt2__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define cflgt2__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define cflgt2__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define cflgt2__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended set :: GPU Time Elapsed */
#define cflgt2__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended set :: GPU Core Clocks */
#define cflgt2__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define cflgt2__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define cflgt2__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended set :: CS Threads Dispatched */
#define cflgt2__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended set :: EU Active */
#define cflgt2__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended set :: EU Stall */
#define cflgt2__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended set :: EU Both FPU Pipes Active */
#define cflgt2__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended set :: EU FPU0 Pipe Active */
#define cflgt2__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended set :: EU FPU1 Pipe Active */
#define cflgt2__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended set :: EU AVG IPC Rate */
#define cflgt2__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended set :: EU Send Pipe Active */
#define cflgt2__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended set :: EU Thread Occupancy */
#define cflgt2__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended set :: Sampler Texels */
#define cflgt2__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended set :: Sampler Texels Misses */
#define cflgt2__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended set :: SLM Bytes Read */
#define cflgt2__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended set :: SLM Bytes Written */
#define cflgt2__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended set :: Shader Memory Accesses */
#define cflgt2__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended set :: Shader Atomic Memory Accesses */
#define cflgt2__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended set :: L3 Shader Throughput */
#define cflgt2__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended set :: Shader Barrier Messages */
#define cflgt2__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended set :: EuUntypedReads0 */
#define cflgt2__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended set :: EuTypedReads0 */
#define cflgt2__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended set :: EuUntypedWrites0 */
#define cflgt2__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended set :: EuTypedWrites0 */
#define cflgt2__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended set :: EuUntypedAtomics0 */
#define cflgt2__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended set :: EuTypedAtomics0 */
#define cflgt2__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedReads0 */
#define cflgt2__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedWrites0 */
#define cflgt2__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended set :: Typed Reads 0 */
#define cflgt2__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: Typed Writes 0 */
#define cflgt2__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended set :: Untyped Reads 0 */
#define cflgt2__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended set :: Untyped Writes 0 */
#define cflgt2__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended set :: Typed Atomics 0 */
#define cflgt2__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended set :: TypedReadsPerCacheLine */
#define cflgt2__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended set :: TypedWritesPerCacheLine */
#define cflgt2__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended set :: UntypedReadsPerCacheLine */
#define cflgt2__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended set :: UntypedWritesPerCacheLine */
#define cflgt2__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended set :: TypedAtomicsPerCacheLine */
#define cflgt2__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache set :: GPU Time Elapsed */
#define cflgt2__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache set :: GPU Core Clocks */
#define cflgt2__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define cflgt2__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define cflgt2__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache set :: GPU Busy */
#define cflgt2__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache set :: VS Threads Dispatched */
#define cflgt2__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: HS Threads Dispatched */
#define cflgt2__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache set :: DS Threads Dispatched */
#define cflgt2__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache set :: GS Threads Dispatched */
#define cflgt2__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: FS Threads Dispatched */
#define cflgt2__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache set :: CS Threads Dispatched */
#define cflgt2__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache set :: EU Active */
#define cflgt2__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache set :: EU Stall */
#define cflgt2__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache set :: EU Both FPU Pipes Active */
#define cflgt2__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Pipe Active */
#define cflgt2__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Pipe Active */
#define cflgt2__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU AVG IPC Rate */
#define cflgt2__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache set :: EU Send Pipe Active */
#define cflgt2__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Hybrid Instruction */
#define cflgt2__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Hybrid Instruction */
#define cflgt2__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU0 Ternary Instruction */
#define cflgt2__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Ternary Instruction */
#define cflgt2__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Binary Instruction */
#define cflgt2__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Binary Instruction */
#define cflgt2__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Move Instruction */
#define cflgt2__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Move Instruction */
#define cflgt2__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: Rasterized Pixels */
#define cflgt2__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache set :: Early Hi-Depth Test Fails */
#define cflgt2__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Early Depth Test Fails */
#define cflgt2__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Samples Killed in FS */
#define cflgt2__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache set :: Pixels Failing Tests */
#define cflgt2__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache set :: Samples Written */
#define cflgt2__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache set :: Samples Blended */
#define cflgt2__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache set :: Sampler Accesses */
#define cflgt2__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache set :: Sampler Texels */
#define cflgt2__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache set :: Sampler Texels Misses */
#define cflgt2__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache set :: SLM Bytes Read */
#define cflgt2__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache set :: SLM Bytes Written */
#define cflgt2__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache set :: Shader Memory Accesses */
#define cflgt2__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache set :: Shader Atomic Memory Accesses */
#define cflgt2__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache set :: L3 Accesses */
#define cflgt2__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache set :: L3 Misses */
#define cflgt2__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache set :: L3 Sampler Throughput */
#define cflgt2__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache set :: L3 Shader Throughput */
#define cflgt2__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache set :: L3 Total Throughput */
#define cflgt2__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache set :: Shader Barrier Messages */
#define cflgt2__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 Accesses */
#define cflgt2__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 01 Accesses */
#define cflgt2__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 02 Accesses */
#define cflgt2__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 03 Accesses */
#define cflgt2__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Accesses */
#define cflgt2__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Hits */
#define cflgt2__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache set :: GTI L3 Throughput */
#define cflgt2__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache set :: GTI Read Throughput */
#define cflgt2__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache set :: GTI Write Throughput */
#define cflgt2__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define cflgt2__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define cflgt2__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define cflgt2__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define cflgt2__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define cflgt2__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define cflgt2__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define cflgt2__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define cflgt2__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define cflgt2__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define cflgt2__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define cflgt2__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define cflgt2__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define cflgt2__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define cflgt2__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define cflgt2__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define cflgt2__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define cflgt2__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define cflgt2__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define cflgt2__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define cflgt2__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define cflgt2__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define cflgt2__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define cflgt2__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define cflgt2__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define cflgt2__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define cflgt2__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define cflgt2__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define cflgt2__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define cflgt2__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define cflgt2__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define cflgt2__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define cflgt2__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define cflgt2__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define cflgt2__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define cflgt2__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define cflgt2__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define cflgt2__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define cflgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define cflgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define cflgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define cflgt2__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define cflgt2__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define cflgt2__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define cflgt2__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define cflgt2__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define cflgt2__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define cflgt2__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define cflgt2__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define cflgt2__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define cflgt2__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define cflgt2__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define cflgt2__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define cflgt2__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define cflgt2__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define cflgt2__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define cflgt2__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define cflgt2__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define cflgt2__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define cflgt2__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define cflgt2__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define cflgt2__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define cflgt2__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define cflgt2__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define cflgt2__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define cflgt2__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define cflgt2__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define cflgt2__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define cflgt2__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define cflgt2__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define cflgt2__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define cflgt2__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define cflgt2__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define cflgt2__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define cflgt2__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define cflgt2__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define cflgt2__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define cflgt2__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define cflgt2__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define cflgt2__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define cflgt2__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define cflgt2__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define cflgt2__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define cflgt2__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define cflgt2__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define cflgt2__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define cflgt2__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define cflgt2__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define cflgt2__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define cflgt2__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define cflgt2__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define cflgt2__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define cflgt2__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define cflgt2__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define cflgt2__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define cflgt2__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define cflgt2__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define cflgt2__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define cflgt2__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define cflgt2__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define cflgt2__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define cflgt2__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define cflgt2__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define cflgt2__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define cflgt2__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define cflgt2__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define cflgt2__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define cflgt2__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define cflgt2__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define cflgt2__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define cflgt2__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define cflgt2__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define cflgt2__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define cflgt2__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define cflgt2__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define cflgt2__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define cflgt2__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define cflgt2__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define cflgt2__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Stalled */
#define cflgt2__l3_2__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Active */
#define cflgt2__l3_2__l30_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define cflgt2__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define cflgt2__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define cflgt2__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define cflgt2__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define cflgt2__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define cflgt2__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define cflgt2__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define cflgt2__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define cflgt2__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define cflgt2__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define cflgt2__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define cflgt2__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define cflgt2__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define cflgt2__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define cflgt2__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define cflgt2__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define cflgt2__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define cflgt2__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define cflgt2__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define cflgt2__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define cflgt2__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define cflgt2__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define cflgt2__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define cflgt2__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define cflgt2__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define cflgt2__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define cflgt2__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define cflgt2__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define cflgt2__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define cflgt2__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define cflgt2__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define cflgt2__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define cflgt2__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define cflgt2__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define cflgt2__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define cflgt2__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define cflgt2__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define cflgt2__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define cflgt2__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define cflgt2__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define cflgt2__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define cflgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define cflgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define cflgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define cflgt2__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define cflgt2__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define cflgt2__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define cflgt2__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define cflgt2__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define cflgt2__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define cflgt2__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define cflgt2__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define cflgt2__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define cflgt2__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define cflgt2__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define cflgt2__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define cflgt2__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define cflgt2__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define cflgt2__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define cflgt2__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define cflgt2__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define cflgt2__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define cflgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define cflgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define cflgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define cflgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define cflgt2__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define cflgt2__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define cflgt2__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define cflgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define cflgt2__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define cflgt2__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define cflgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define cflgt2__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define cflgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define cflgt2__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define cflgt2__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define cflgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define cflgt2__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define cflgt2__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define cflgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define cflgt2__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define cflgt2__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define cflgt2__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define cflgt2__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define cflgt2__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define cflgt2__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define cflgt2__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define cflgt2__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define cflgt2__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define cflgt2__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define cflgt2__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define cflgt2__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define cflgt2__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define cflgt2__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define cflgt2__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define cflgt2__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define cflgt2__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define cflgt2__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define cflgt2__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define cflgt2__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define cflgt2__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define cflgt2__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define cflgt2__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define cflgt2__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define cflgt2__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define cflgt2__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define cflgt2__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define cflgt2__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define cflgt2__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define cflgt2__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define cflgt2__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define cflgt2__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define cflgt2__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define cflgt2__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define cflgt2__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define cflgt2__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define cflgt2__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define cflgt2__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define cflgt2__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define cflgt2__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define cflgt2__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define cflgt2__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define cflgt2__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define cflgt2__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define cflgt2__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define cflgt2__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define cflgt2__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define cflgt2__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define cflgt2__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define cflgt2__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define cflgt2__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define cflgt2__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define cflgt2__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define cflgt2__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define cflgt2__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define cflgt2__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define cflgt2__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define cflgt2__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define cflgt2__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define cflgt2__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define cflgt2__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define cflgt2__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define cflgt2__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define cflgt2__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define cflgt2__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define cflgt2__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define cflgt2__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define cflgt2__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define cflgt2__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define cflgt2__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define cflgt2__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define cflgt2__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define cflgt2__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define cflgt2__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define cflgt2__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define cflgt2__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define cflgt2__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define cflgt2__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define cflgt2__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define cflgt2__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define cflgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define cflgt2__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define cflgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define cflgt2__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define cflgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define cflgt2__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define cflgt2__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define cflgt2__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define cflgt2__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define cflgt2__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define cflgt2__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define cflgt2__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define cflgt2__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define cflgt2__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define cflgt2__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define cflgt2__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define cflgt2__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define cflgt2__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define cflgt2__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define cflgt2__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define cflgt2__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define cflgt2__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define cflgt2__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define cflgt2__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define cflgt2__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define cflgt2__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define cflgt2__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define cflgt2__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define cflgt2__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define cflgt2__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define cflgt2__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define cflgt2__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define cflgt2__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define cflgt2__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define cflgt2__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define cflgt2__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define cflgt2__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define cflgt2__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define cflgt2__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define cflgt2__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define cflgt2__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define cflgt2__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define cflgt2__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define cflgt2__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define cflgt2__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define cflgt2__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define cflgt2__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define cflgt2__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define cflgt2__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define cflgt2__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define cflgt2__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define cflgt2__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define cflgt2__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define cflgt2__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define cflgt2__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
#define cflgt2__compute_extra__fpu1_active_adjusted__read \
   sklgt2__compute_extra__fpu1_active_adjusted__read

/* Media Vme Pipe metrics set :: GPU Time Elapsed */
#define cflgt2__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metrics set :: GPU Core Clocks */
#define cflgt2__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define cflgt2__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define cflgt2__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metrics set :: GPU Busy */
#define cflgt2__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metrics set :: CS Threads Dispatched */
#define cflgt2__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metrics set :: EU Active */
#define cflgt2__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metrics set :: EU Stall */
#define cflgt2__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: EU Both FPU Pipes Active */
#define cflgt2__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metrics set :: EU Thread Occupancy */
#define cflgt2__vme_pipe__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* Media Vme Pipe metrics set :: VME Busy */
#define cflgt2__vme_pipe__vme_busy__read \
   bdw__vme_pipe__vme_busy__read

/* Gpu Rings Busyness metrics set :: GPU Time Elapsed */
#define cflgt2__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness metrics set :: GPU Core Clocks */
#define cflgt2__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness metrics set :: AVG GPU Core Frequency */
#define cflgt2__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness metrics set :: AVG GPU Core Frequency */
#define cflgt2__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness metrics set :: Render Ring Busy */
#define cflgt2__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness metrics set :: Vdbox0 Ring Busy */
#define cflgt2__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness metrics set :: Vebox Ring Busy */
#define cflgt2__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness metrics set :: Blitter Ring Busy */
#define cflgt2__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness metrics set :: AnyRingBusy */
#define cflgt2__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define cflgt2__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define cflgt2__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define cflgt2__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define cflgt2__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define cflgt2__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define cflgt2__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define cflgt2__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define cflgt2__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define cflgt2__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define cflgt2__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define cflgt2__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define cflgt2__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define cflgt2__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define cflgt2__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define cflgt2__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define cflgt2__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define cflgt2__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define cflgt2__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* AsyncCompute metrics set :: GPU Time Elapsed */
#define cflgt2__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute metrics set :: GPU Core Clocks */
#define cflgt2__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute metrics set :: AVG GPU Core Frequency */
#define cflgt2__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute metrics set :: AVG GPU Core Frequency */
#define cflgt2__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute metrics set :: GPU Busy */
#define cflgt2__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute metrics set :: VS Threads Dispatched */
#define cflgt2__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute metrics set :: HS Threads Dispatched */
#define cflgt2__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute metrics set :: DS Threads Dispatched */
#define cflgt2__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute metrics set :: GS Threads Dispatched */
#define cflgt2__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute metrics set :: FS Threads Dispatched */
#define cflgt2__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute metrics set :: CS Threads Dispatched */
#define cflgt2__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute metrics set :: EU FPU0 Pipe Active */
#define cflgt2__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute metrics set :: VS FPU0 Pipe Active */
#define cflgt2__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute metrics set :: PS FPU0 Pipe Active */
#define cflgt2__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute metrics set :: CS FPU0 Pipe Active */
#define cflgt2__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute metrics set :: EU FPU1 Pipe Active */
#define cflgt2__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute metrics set :: VS FPU1 Pipe Active */
#define cflgt2__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute metrics set :: PS FPU1 Pipe Active */
#define cflgt2__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute metrics set :: CS FPU1 Pipe Active */
#define cflgt2__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute metrics set :: EU Thread Occupancy */
#define cflgt2__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute metrics set :: EU Active */
#define cflgt2__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute metrics set :: EU Stall */
#define cflgt2__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define cflgt3__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define cflgt3__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt3__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt3__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define cflgt3__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define cflgt3__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define cflgt3__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define cflgt3__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define cflgt3__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define cflgt3__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define cflgt3__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define cflgt3__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define cflgt3__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define cflgt3__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define cflgt3__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define cflgt3__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define cflgt3__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define cflgt3__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define cflgt3__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define cflgt3__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define cflgt3__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define cflgt3__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define cflgt3__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define cflgt3__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define cflgt3__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define cflgt3__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define cflgt3__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define cflgt3__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define cflgt3__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define cflgt3__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define cflgt3__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define cflgt3__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define cflgt3__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define cflgt3__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define cflgt3__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
#define cflgt3__render_basic__sampler_l1_misses__read \
   bdw__render_basic__sampler_l1_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define cflgt3__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define cflgt3__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define cflgt3__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define cflgt3__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
#define cflgt3__render_basic__l3_lookups__read \
   bdw__render_basic__l3_lookups__read

/* Render Metrics Basic set :: L3 Misses */
#define cflgt3__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
#define cflgt3__render_basic__l3_sampler_throughput__read \
   bdw__render_basic__l3_sampler_throughput__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define cflgt3__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define cflgt3__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define cflgt3__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define cflgt3__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define cflgt3__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define cflgt3__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define cflgt3__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define cflgt3__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define cflgt3__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define cflgt3__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define cflgt3__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define cflgt3__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt3__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define cflgt3__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define cflgt3__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define cflgt3__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define cflgt3__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define cflgt3__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define cflgt3__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define cflgt3__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define cflgt3__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define cflgt3__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define cflgt3__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define cflgt3__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define cflgt3__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define cflgt3__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define cflgt3__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define cflgt3__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define cflgt3__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define cflgt3__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define cflgt3__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define cflgt3__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define cflgt3__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define cflgt3__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define cflgt3__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define cflgt3__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define cflgt3__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define cflgt3__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define cflgt3__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define cflgt3__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define cflgt3__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define cflgt3__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define cflgt3__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define cflgt3__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define cflgt3__compute_basic__typed_bytes_read__read \
   bdw__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define cflgt3__compute_basic__typed_bytes_written__read \
   bdw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define cflgt3__compute_basic__untyped_bytes_read__read \
   bdw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define cflgt3__compute_basic__untyped_bytes_written__read \
   bdw__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define cflgt3__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define cflgt3__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define cflgt3__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define cflgt3__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define cflgt3__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define cflgt3__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define cflgt3__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define cflgt3__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define cflgt3__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define cflgt3__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define cflgt3__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define cflgt3__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define cflgt3__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define cflgt3__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define cflgt3__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define cflgt3__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define cflgt3__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define cflgt3__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define cflgt3__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define cflgt3__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define cflgt3__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define cflgt3__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define cflgt3__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define cflgt3__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define cflgt3__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define cflgt3__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define cflgt3__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define cflgt3__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define cflgt3__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define cflgt3__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define cflgt3__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define cflgt3__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define cflgt3__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define cflgt3__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define cflgt3__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define cflgt3__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define cflgt3__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define cflgt3__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define cflgt3__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define cflgt3__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define cflgt3__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define cflgt3__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define cflgt3__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define cflgt3__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define cflgt3__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define cflgt3__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define cflgt3__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define cflgt3__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt3__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt3__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define cflgt3__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define cflgt3__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define cflgt3__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define cflgt3__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define cflgt3__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define cflgt3__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define cflgt3__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define cflgt3__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define cflgt3__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define cflgt3__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define cflgt3__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define cflgt3__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define cflgt3__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define cflgt3__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define cflgt3__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define cflgt3__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define cflgt3__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define cflgt3__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define cflgt3__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define cflgt3__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define cflgt3__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define cflgt3__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define cflgt3__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define cflgt3__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define cflgt3__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define cflgt3__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define cflgt3__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define cflgt3__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define cflgt3__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define cflgt3__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define cflgt3__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define cflgt3__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define cflgt3__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define cflgt3__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define cflgt3__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define cflgt3__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define cflgt3__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define cflgt3__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define cflgt3__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define cflgt3__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt3__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define cflgt3__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define cflgt3__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define cflgt3__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define cflgt3__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define cflgt3__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define cflgt3__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define cflgt3__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define cflgt3__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define cflgt3__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define cflgt3__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define cflgt3__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define cflgt3__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define cflgt3__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define cflgt3__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define cflgt3__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define cflgt3__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define cflgt3__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define cflgt3__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define cflgt3__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define cflgt3__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define cflgt3__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define cflgt3__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define cflgt3__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define cflgt3__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define cflgt3__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define cflgt3__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define cflgt3__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define cflgt3__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define cflgt3__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define cflgt3__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define cflgt3__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define cflgt3__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define cflgt3__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define cflgt3__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define cflgt3__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define cflgt3__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define cflgt3__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define cflgt3__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define cflgt3__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended metrics set :: GPU Time Elapsed */
#define cflgt3__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended metrics set :: GPU Core Clocks */
#define cflgt3__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metrics set :: AVG GPU Core Frequency */
#define cflgt3__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended metrics set :: AVG GPU Core Frequency */
#define cflgt3__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended metrics set :: CS Threads Dispatched */
#define cflgt3__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended metrics set :: EU Active */
#define cflgt3__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended metrics set :: EU Stall */
#define cflgt3__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended metrics set :: EU Both FPU Pipes Active */
#define cflgt3__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended metrics set :: EU FPU0 Pipe Active */
#define cflgt3__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended metrics set :: EU FPU1 Pipe Active */
#define cflgt3__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended metrics set :: EU AVG IPC Rate */
#define cflgt3__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended metrics set :: EU Send Pipe Active */
#define cflgt3__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended metrics set :: EU Thread Occupancy */
#define cflgt3__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended metrics set :: Sampler Texels */
#define cflgt3__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended metrics set :: Sampler Texels Misses */
#define cflgt3__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended metrics set :: SLM Bytes Read */
#define cflgt3__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended metrics set :: SLM Bytes Written */
#define cflgt3__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended metrics set :: Shader Memory Accesses */
#define cflgt3__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended metrics set :: Shader Atomic Memory Accesses */
#define cflgt3__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended metrics set :: L3 Shader Throughput */
#define cflgt3__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended metrics set :: Shader Barrier Messages */
#define cflgt3__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended metrics set :: EuUntypedReads0 */
#define cflgt3__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended metrics set :: EuTypedReads0 */
#define cflgt3__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended metrics set :: EuUntypedWrites0 */
#define cflgt3__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended metrics set :: EuTypedWrites0 */
#define cflgt3__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended metrics set :: EuUntypedAtomics0 */
#define cflgt3__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended metrics set :: EuTypedAtomics0 */
#define cflgt3__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended metrics set :: EuA64UntypedReads0 */
#define cflgt3__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended metrics set :: EuA64UntypedWrites0 */
#define cflgt3__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended metrics set :: Typed Reads 0 */
#define cflgt3__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metrics set :: Typed Writes 0 */
#define cflgt3__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended metrics set :: Untyped Reads 0 */
#define cflgt3__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended metrics set :: Untyped Writes 0 */
#define cflgt3__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended metrics set :: Typed Atomics 0 */
#define cflgt3__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended metrics set :: TypedReadsPerCacheLine */
#define cflgt3__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended metrics set :: TypedWritesPerCacheLine */
#define cflgt3__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended metrics set :: UntypedReadsPerCacheLine */
#define cflgt3__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended metrics set :: UntypedWritesPerCacheLine */
#define cflgt3__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended metrics set :: TypedAtomicsPerCacheLine */
#define cflgt3__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache metrics set :: GPU Time Elapsed */
#define cflgt3__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache metrics set :: GPU Core Clocks */
#define cflgt3__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache metrics set :: AVG GPU Core Frequency */
#define cflgt3__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache metrics set :: AVG GPU Core Frequency */
#define cflgt3__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache metrics set :: GPU Busy */
#define cflgt3__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache metrics set :: VS Threads Dispatched */
#define cflgt3__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metrics set :: HS Threads Dispatched */
#define cflgt3__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache metrics set :: DS Threads Dispatched */
#define cflgt3__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache metrics set :: GS Threads Dispatched */
#define cflgt3__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metrics set :: FS Threads Dispatched */
#define cflgt3__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache metrics set :: CS Threads Dispatched */
#define cflgt3__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache metrics set :: EU Active */
#define cflgt3__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache metrics set :: EU Stall */
#define cflgt3__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache metrics set :: EU Both FPU Pipes Active */
#define cflgt3__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Pipe Active */
#define cflgt3__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Pipe Active */
#define cflgt3__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache metrics set :: EU AVG IPC Rate */
#define cflgt3__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache metrics set :: EU Send Pipe Active */
#define cflgt3__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Hybrid Instruction */
#define cflgt3__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Hybrid Instruction */
#define cflgt3__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Ternary Instruction */
#define cflgt3__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Ternary Instruction */
#define cflgt3__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Binary Instruction */
#define cflgt3__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Binary Instruction */
#define cflgt3__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Move Instruction */
#define cflgt3__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Move Instruction */
#define cflgt3__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache metrics set :: Rasterized Pixels */
#define cflgt3__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache metrics set :: Early Hi-Depth Test Fails */
#define cflgt3__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: Early Depth Test Fails */
#define cflgt3__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: Samples Killed in FS */
#define cflgt3__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache metrics set :: Pixels Failing Tests */
#define cflgt3__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache metrics set :: Samples Written */
#define cflgt3__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache metrics set :: Samples Blended */
#define cflgt3__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache metrics set :: Sampler Accesses */
#define cflgt3__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache metrics set :: Sampler Texels */
#define cflgt3__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache metrics set :: Sampler Texels Misses */
#define cflgt3__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache metrics set :: SLM Bytes Read */
#define cflgt3__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache metrics set :: SLM Bytes Written */
#define cflgt3__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache metrics set :: Shader Memory Accesses */
#define cflgt3__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache metrics set :: Shader Atomic Memory Accesses */
#define cflgt3__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache metrics set :: L3 Accesses */
#define cflgt3__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Misses */
#define cflgt3__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache metrics set :: L3 Sampler Throughput */
#define cflgt3__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache metrics set :: L3 Shader Throughput */
#define cflgt3__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache metrics set :: L3 Total Throughput */
#define cflgt3__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache metrics set :: Shader Barrier Messages */
#define cflgt3__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 00 Accesses */
#define cflgt3__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 01 Accesses */
#define cflgt3__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 02 Accesses */
#define cflgt3__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 03 Accesses */
#define cflgt3__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 00 IC Accesses */
#define cflgt3__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Bank 00 IC Hits */
#define cflgt3__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache metrics set :: GTI L3 Throughput */
#define cflgt3__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache metrics set :: GTI Read Throughput */
#define cflgt3__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache metrics set :: GTI Write Throughput */
#define cflgt3__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define cflgt3__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define cflgt3__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define cflgt3__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define cflgt3__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define cflgt3__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define cflgt3__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define cflgt3__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define cflgt3__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define cflgt3__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define cflgt3__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define cflgt3__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define cflgt3__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define cflgt3__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define cflgt3__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define cflgt3__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define cflgt3__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define cflgt3__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define cflgt3__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define cflgt3__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define cflgt3__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define cflgt3__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define cflgt3__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define cflgt3__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define cflgt3__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define cflgt3__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define cflgt3__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define cflgt3__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define cflgt3__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define cflgt3__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define cflgt3__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define cflgt3__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define cflgt3__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define cflgt3__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define cflgt3__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define cflgt3__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define cflgt3__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define cflgt3__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define cflgt3__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define cflgt3__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss2) */
#define cflgt3__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define cflgt3__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define cflgt3__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define cflgt3__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define cflgt3__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define cflgt3__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define cflgt3__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define cflgt3__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define cflgt3__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define cflgt3__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define cflgt3__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define cflgt3__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define cflgt3__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define cflgt3__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define cflgt3__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define cflgt3__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define cflgt3__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define cflgt3__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define cflgt3__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define cflgt3__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define cflgt3__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define cflgt3__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define cflgt3__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define cflgt3__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define cflgt3__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define cflgt3__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define cflgt3__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define cflgt3__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define cflgt3__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define cflgt3__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define cflgt3__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define cflgt3__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define cflgt3__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define cflgt3__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define cflgt3__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define cflgt3__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define cflgt3__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define cflgt3__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define cflgt3__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define cflgt3__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define cflgt3__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define cflgt3__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define cflgt3__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_2 :: GPU Time Elapsed */
#define cflgt3__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_2 :: GPU Core Clocks */
#define cflgt3__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define cflgt3__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_2 :: AVG GPU Core Frequency */
#define cflgt3__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_2 :: GPU Busy */
#define cflgt3__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_2 :: VS Threads Dispatched */
#define cflgt3__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_2 :: HS Threads Dispatched */
#define cflgt3__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_2 :: DS Threads Dispatched */
#define cflgt3__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_2 :: GS Threads Dispatched */
#define cflgt3__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_2 :: FS Threads Dispatched */
#define cflgt3__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_2 :: CS Threads Dispatched */
#define cflgt3__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_2 :: EU Active */
#define cflgt3__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_2 :: EU Stall */
#define cflgt3__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_2 :: EU Both FPU Pipes Active */
#define cflgt3__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_2 :: VS FPU0 Pipe Active */
#define cflgt3__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_2 :: VS FPU1 Pipe Active */
#define cflgt3__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_2 :: VS Send Pipe Active */
#define cflgt3__l3_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_2 :: PS FPU0 Pipe Active */
#define cflgt3__l3_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_2 :: PS FPU1 Pipe Active */
#define cflgt3__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_2 :: PS Send Pipeline Active */
#define cflgt3__l3_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_2 :: FS Both FPU Active */
#define cflgt3__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_2 :: Rasterized Pixels */
#define cflgt3__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_2 :: Early Hi-Depth Test Fails */
#define cflgt3__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_2 :: Early Depth Test Fails */
#define cflgt3__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Samples Killed in FS */
#define cflgt3__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_2 :: Pixels Failing Tests */
#define cflgt3__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_2 :: Samples Written */
#define cflgt3__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_2 :: Samples Blended */
#define cflgt3__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_2 :: Sampler Texels */
#define cflgt3__l3_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_2 :: Sampler Texels Misses */
#define cflgt3__l3_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_2 :: SLM Bytes Read */
#define cflgt3__l3_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_2 :: SLM Bytes Written */
#define cflgt3__l3_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_2 :: Shader Memory Accesses */
#define cflgt3__l3_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_2 :: Shader Atomic Memory Accesses */
#define cflgt3__l3_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_2 :: L3 Shader Throughput */
#define cflgt3__l3_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_2 :: Shader Barrier Messages */
#define cflgt3__l3_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Stalled */
#define cflgt3__l3_2__l30_bank2_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_2 :: Slice0 L3 Bank2 Active */
#define cflgt3__l3_2__l30_bank2_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_2 :: SQ is full */
#define cflgt3__l3_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_3 :: GPU Time Elapsed */
#define cflgt3__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_3 :: GPU Core Clocks */
#define cflgt3__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define cflgt3__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_3 :: AVG GPU Core Frequency */
#define cflgt3__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_3 :: GPU Busy */
#define cflgt3__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_3 :: VS Threads Dispatched */
#define cflgt3__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_3 :: HS Threads Dispatched */
#define cflgt3__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_3 :: DS Threads Dispatched */
#define cflgt3__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_3 :: GS Threads Dispatched */
#define cflgt3__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_3 :: FS Threads Dispatched */
#define cflgt3__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_3 :: CS Threads Dispatched */
#define cflgt3__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_3 :: EU Active */
#define cflgt3__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_3 :: EU Stall */
#define cflgt3__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_3 :: EU Both FPU Pipes Active */
#define cflgt3__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_3 :: VS FPU0 Pipe Active */
#define cflgt3__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_3 :: VS FPU1 Pipe Active */
#define cflgt3__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_3 :: VS Send Pipe Active */
#define cflgt3__l3_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_3 :: PS FPU0 Pipe Active */
#define cflgt3__l3_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_3 :: PS FPU1 Pipe Active */
#define cflgt3__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_3 :: PS Send Pipeline Active */
#define cflgt3__l3_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_3 :: FS Both FPU Active */
#define cflgt3__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_3 :: Rasterized Pixels */
#define cflgt3__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_3 :: Early Hi-Depth Test Fails */
#define cflgt3__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_3 :: Early Depth Test Fails */
#define cflgt3__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Samples Killed in FS */
#define cflgt3__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_3 :: Pixels Failing Tests */
#define cflgt3__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_3 :: Samples Written */
#define cflgt3__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_3 :: Samples Blended */
#define cflgt3__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_3 :: Sampler Texels */
#define cflgt3__l3_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_3 :: Sampler Texels Misses */
#define cflgt3__l3_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_3 :: SLM Bytes Read */
#define cflgt3__l3_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_3 :: SLM Bytes Written */
#define cflgt3__l3_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_3 :: Shader Memory Accesses */
#define cflgt3__l3_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_3 :: Shader Atomic Memory Accesses */
#define cflgt3__l3_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_3 :: L3 Shader Throughput */
#define cflgt3__l3_3__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_3 :: Shader Barrier Messages */
#define cflgt3__l3_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Stalled */
#define cflgt3__l3_3__l30_bank3_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_3 :: Slice0 L3 Bank3 Active */
#define cflgt3__l3_3__l30_bank3_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_3 :: SQ is full */
#define cflgt3__l3_3__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define cflgt3__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define cflgt3__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define cflgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define cflgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define cflgt3__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define cflgt3__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define cflgt3__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define cflgt3__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define cflgt3__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define cflgt3__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define cflgt3__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define cflgt3__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define cflgt3__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define cflgt3__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define cflgt3__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define cflgt3__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define cflgt3__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define cflgt3__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define cflgt3__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define cflgt3__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define cflgt3__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define cflgt3__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define cflgt3__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define cflgt3__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define cflgt3__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define cflgt3__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define cflgt3__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define cflgt3__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define cflgt3__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define cflgt3__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define cflgt3__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define cflgt3__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define cflgt3__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define cflgt3__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define cflgt3__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define cflgt3__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define cflgt3__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define cflgt3__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define cflgt3__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define cflgt3__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define cflgt3__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define cflgt3__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define cflgt3__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define cflgt3__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define cflgt3__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define cflgt3__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define cflgt3__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define cflgt3__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define cflgt3__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define cflgt3__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define cflgt3__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define cflgt3__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define cflgt3__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define cflgt3__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define cflgt3__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define cflgt3__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define cflgt3__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define cflgt3__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define cflgt3__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define cflgt3__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define cflgt3__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define cflgt3__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define cflgt3__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define cflgt3__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define cflgt3__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define cflgt3__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define cflgt3__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define cflgt3__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define cflgt3__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define cflgt3__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define cflgt3__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define cflgt3__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define cflgt3__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define cflgt3__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define cflgt3__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define cflgt3__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define cflgt3__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define cflgt3__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define cflgt3__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define cflgt3__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define cflgt3__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define cflgt3__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define cflgt3__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define cflgt3__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define cflgt3__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define cflgt3__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define cflgt3__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define cflgt3__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define cflgt3__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define cflgt3__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define cflgt3__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define cflgt3__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define cflgt3__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define cflgt3__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define cflgt3__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define cflgt3__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define cflgt3__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define cflgt3__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define cflgt3__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define cflgt3__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define cflgt3__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define cflgt3__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define cflgt3__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define cflgt3__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define cflgt3__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define cflgt3__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define cflgt3__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define cflgt3__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define cflgt3__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define cflgt3__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define cflgt3__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define cflgt3__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define cflgt3__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define cflgt3__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define cflgt3__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define cflgt3__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define cflgt3__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define cflgt3__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define cflgt3__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define cflgt3__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define cflgt3__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define cflgt3__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define cflgt3__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define cflgt3__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define cflgt3__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define cflgt3__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define cflgt3__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define cflgt3__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define cflgt3__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define cflgt3__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define cflgt3__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define cflgt3__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define cflgt3__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define cflgt3__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define cflgt3__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define cflgt3__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define cflgt3__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define cflgt3__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define cflgt3__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define cflgt3__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define cflgt3__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define cflgt3__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define cflgt3__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define cflgt3__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define cflgt3__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define cflgt3__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define cflgt3__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define cflgt3__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define cflgt3__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define cflgt3__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define cflgt3__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define cflgt3__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define cflgt3__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define cflgt3__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define cflgt3__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define cflgt3__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define cflgt3__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define cflgt3__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define cflgt3__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define cflgt3__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define cflgt3__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define cflgt3__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define cflgt3__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define cflgt3__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define cflgt3__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define cflgt3__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define cflgt3__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define cflgt3__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define cflgt3__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define cflgt3__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define cflgt3__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define cflgt3__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define cflgt3__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define cflgt3__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define cflgt3__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define cflgt3__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
#define cflgt3__compute_extra__fpu1_active_adjusted__read \
   bdw__compute_extra__fpu1_active_adjusted__read

/* Media Vme Pipe metrics set :: GPU Time Elapsed */
#define cflgt3__vme_pipe__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Media Vme Pipe metrics set :: GPU Core Clocks */
#define cflgt3__vme_pipe__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define cflgt3__vme_pipe__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Media Vme Pipe metrics set :: AVG GPU Core Frequency */
#define cflgt3__vme_pipe__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Media Vme Pipe metrics set :: GPU Busy */
#define cflgt3__vme_pipe__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Media Vme Pipe metrics set :: CS Threads Dispatched */
#define cflgt3__vme_pipe__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Media Vme Pipe metrics set :: EU Active */
#define cflgt3__vme_pipe__eu_active__read \
   bdw__render_basic__eu_active__read

/* Media Vme Pipe metrics set :: EU Stall */
#define cflgt3__vme_pipe__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Media Vme Pipe metrics set :: EU Both FPU Pipes Active */
#define cflgt3__vme_pipe__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Media Vme Pipe metrics set :: EU Thread Occupancy */
#define cflgt3__vme_pipe__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* Media Vme Pipe metrics set :: VME Busy */
#define cflgt3__vme_pipe__vme_busy__read \
   bdw__vme_pipe__vme_busy__read

/* Gpu Rings Busyness metrics set :: GPU Time Elapsed */
#define cflgt3__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness metrics set :: GPU Core Clocks */
#define cflgt3__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness metrics set :: AVG GPU Core Frequency */
#define cflgt3__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness metrics set :: AVG GPU Core Frequency */
#define cflgt3__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness metrics set :: Render Ring Busy */
#define cflgt3__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness metrics set :: Vdbox0 Ring Busy */
#define cflgt3__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness metrics set :: Vdbox1 Ring Busy */
#define cflgt3__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness metrics set :: Vebox Ring Busy */
#define cflgt3__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness metrics set :: Blitter Ring Busy */
#define cflgt3__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* Gpu Rings Busyness metrics set :: AnyRingBusy */
#define cflgt3__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define cflgt3__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define cflgt3__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define cflgt3__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define cflgt3__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define cflgt3__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define cflgt3__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define cflgt3__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define cflgt3__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define cflgt3__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define cflgt3__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define cflgt3__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define cflgt3__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define cflgt3__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define cflgt3__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define cflgt3__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define cflgt3__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define cflgt3__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define cflgt3__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* AsyncCompute metrics set :: GPU Time Elapsed */
#define cflgt3__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute metrics set :: GPU Core Clocks */
#define cflgt3__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute metrics set :: AVG GPU Core Frequency */
#define cflgt3__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute metrics set :: AVG GPU Core Frequency */
#define cflgt3__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute metrics set :: GPU Busy */
#define cflgt3__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute metrics set :: VS Threads Dispatched */
#define cflgt3__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute metrics set :: HS Threads Dispatched */
#define cflgt3__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute metrics set :: DS Threads Dispatched */
#define cflgt3__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute metrics set :: GS Threads Dispatched */
#define cflgt3__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute metrics set :: FS Threads Dispatched */
#define cflgt3__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute metrics set :: CS Threads Dispatched */
#define cflgt3__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute metrics set :: EU FPU0 Pipe Active */
#define cflgt3__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute metrics set :: VS FPU0 Pipe Active */
#define cflgt3__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute metrics set :: PS FPU0 Pipe Active */
#define cflgt3__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute metrics set :: CS FPU0 Pipe Active */
#define cflgt3__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute metrics set :: EU FPU1 Pipe Active */
#define cflgt3__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute metrics set :: VS FPU1 Pipe Active */
#define cflgt3__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute metrics set :: PS FPU1 Pipe Active */
#define cflgt3__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute metrics set :: CS FPU1 Pipe Active */
#define cflgt3__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute metrics set :: EU Thread Occupancy */
#define cflgt3__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute metrics set :: EU Active */
#define cflgt3__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute metrics set :: EU Stall */
#define cflgt3__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define bxt__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define bxt__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define bxt__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define bxt__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define bxt__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define bxt__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define bxt__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define bxt__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define bxt__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define bxt__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define bxt__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define bxt__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define bxt__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define bxt__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define bxt__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define bxt__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define bxt__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define bxt__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define bxt__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define bxt__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define bxt__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define bxt__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define bxt__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define bxt__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define bxt__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define bxt__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define bxt__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define bxt__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define bxt__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define bxt__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define bxt__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define bxt__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define bxt__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define bxt__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define bxt__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
#define bxt__render_basic__sampler_l1_misses__read \
   sklgt2__render_basic__sampler_l1_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define bxt__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define bxt__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define bxt__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define bxt__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
#define bxt__render_basic__l3_lookups__read \
   sklgt2__render_basic__l3_lookups__read

/* Render Metrics Basic set :: L3 Misses */
#define bxt__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
#define bxt__render_basic__l3_sampler_throughput__read \
   sklgt2__render_basic__l3_sampler_throughput__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define bxt__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define bxt__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define bxt__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define bxt__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define bxt__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define bxt__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define bxt__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define bxt__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define bxt__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define bxt__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define bxt__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define bxt__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define bxt__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define bxt__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define bxt__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define bxt__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define bxt__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define bxt__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define bxt__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define bxt__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define bxt__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define bxt__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define bxt__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define bxt__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define bxt__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define bxt__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define bxt__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define bxt__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define bxt__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define bxt__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define bxt__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define bxt__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define bxt__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define bxt__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define bxt__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define bxt__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define bxt__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define bxt__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define bxt__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define bxt__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define bxt__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define bxt__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define bxt__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define bxt__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define bxt__compute_basic__typed_bytes_read__read \
   hsw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
static uint64_t
bxt__compute_basic__typed_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ B 4 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = results->accumulator[query->b_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define bxt__compute_basic__untyped_bytes_read__read \
   hsw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Writes */
static uint64_t
bxt__compute_basic__untyped_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ C 2 READ UADD $EuSlicesTotalCount 64 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = perf->sys_vars.n_eu_slices * 64;
   uint64_t tmp4 = tmp2 * tmp3;

   return tmp4;
}

/* Compute Metrics Basic set :: GTI Read Throughput */
#define bxt__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define bxt__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define bxt__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define bxt__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define bxt__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define bxt__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define bxt__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define bxt__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define bxt__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define bxt__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define bxt__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define bxt__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define bxt__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define bxt__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define bxt__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define bxt__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define bxt__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define bxt__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define bxt__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define bxt__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define bxt__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define bxt__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define bxt__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define bxt__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define bxt__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define bxt__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define bxt__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define bxt__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define bxt__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define bxt__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define bxt__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define bxt__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define bxt__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define bxt__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define bxt__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define bxt__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define bxt__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define bxt__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define bxt__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define bxt__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define bxt__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define bxt__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define bxt__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define bxt__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define bxt__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define bxt__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metric set :: GPU Time Elapsed */
#define bxt__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metric set :: GPU Core Clocks */
#define bxt__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metric set :: AVG GPU Core Frequency */
#define bxt__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metric set :: AVG GPU Core Frequency */
#define bxt__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metric set :: GPU Busy */
#define bxt__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metric set :: VS Threads Dispatched */
#define bxt__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metric set :: HS Threads Dispatched */
#define bxt__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metric set :: DS Threads Dispatched */
#define bxt__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metric set :: GS Threads Dispatched */
#define bxt__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metric set :: FS Threads Dispatched */
#define bxt__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metric set :: CS Threads Dispatched */
#define bxt__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metric set :: EU Active */
#define bxt__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metric set :: EU Stall */
#define bxt__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metric set :: Rasterized Pixels */
#define bxt__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metric set :: Early Hi-Depth Test Fails */
#define bxt__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metric set :: Early Depth Test Fails */
#define bxt__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metric set :: Samples Killed in FS */
#define bxt__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metric set :: Pixels Failing Tests */
#define bxt__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metric set :: Samples Written */
#define bxt__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metric set :: Samples Blended */
#define bxt__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metric set :: Sampler Texels */
#define bxt__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metric set :: Sampler Texels Misses */
#define bxt__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metric set :: SLM Bytes Read */
#define bxt__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metric set :: SLM Bytes Written */
#define bxt__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metric set :: Shader Memory Accesses */
#define bxt__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metric set :: Shader Atomic Memory Accesses */
#define bxt__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metric set :: Shader Barrier Messages */
#define bxt__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metric set :: GtiCmdStreamerMemoryReads */
#define bxt__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metric set :: GtiRsMemoryReads */
#define bxt__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metric set :: GtiVfMemoryReads */
#define bxt__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metric set :: GtiRccMemoryReads */
#define bxt__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metric set :: GtiMscMemoryReads */
#define bxt__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metric set :: GtiHizMemoryReads */
#define bxt__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metric set :: GtiStcMemoryReads */
#define bxt__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metric set :: GtiRczMemoryReads */
#define bxt__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metric set :: GtiMemoryReads */
#define bxt__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metric set :: GtiL3Bank0Reads */
#define bxt__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metric set :: GtiL3Bank1Reads */
#define bxt__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metric set :: GtiL3Bank2Reads */
#define bxt__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metric set :: GtiL3Bank3Reads */
#define bxt__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metric set :: GtiL3Reads */
#define bxt__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metric set :: GtiRingAccesses */
#define bxt__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metric set :: GPU Time Elapsed */
#define bxt__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metric set :: GPU Core Clocks */
#define bxt__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metric set :: AVG GPU Core Frequency */
#define bxt__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metric set :: AVG GPU Core Frequency */
#define bxt__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metric set :: GPU Busy */
#define bxt__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metric set :: VS Threads Dispatched */
#define bxt__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metric set :: HS Threads Dispatched */
#define bxt__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metric set :: DS Threads Dispatched */
#define bxt__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metric set :: GS Threads Dispatched */
#define bxt__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metric set :: FS Threads Dispatched */
#define bxt__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metric set :: CS Threads Dispatched */
#define bxt__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metric set :: EU Active */
#define bxt__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metric set :: EU Stall */
#define bxt__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metric set :: Rasterized Pixels */
#define bxt__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metric set :: Early Hi-Depth Test Fails */
#define bxt__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metric set :: Early Depth Test Fails */
#define bxt__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metric set :: Samples Killed in FS */
#define bxt__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metric set :: Pixels Failing Tests */
#define bxt__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metric set :: Samples Written */
#define bxt__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metric set :: Samples Blended */
#define bxt__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metric set :: Sampler Texels */
#define bxt__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metric set :: Sampler Texels Misses */
#define bxt__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metric set :: SLM Bytes Read */
#define bxt__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metric set :: SLM Bytes Written */
#define bxt__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metric set :: Shader Memory Accesses */
#define bxt__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metric set :: Shader Atomic Memory Accesses */
#define bxt__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metric set :: L3 Shader Throughput */
#define bxt__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metric set :: Shader Barrier Messages */
#define bxt__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metric set :: GtiCmdStreamerMemoryWrites */
#define bxt__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metric set :: GtiSoMemoryWrites */
#define bxt__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metric set :: GtiRccMemoryWrites */
#define bxt__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metric set :: GtiMscMemoryWrites */
#define bxt__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metric set :: GtiHizMemoryWrites */
#define bxt__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metric set :: GtiStcMemoryWrites */
#define bxt__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metric set :: GtiRczMemoryWrites */
#define bxt__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metric set :: GtiMemoryWrites */
#define bxt__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metric set :: GtiL3Bank0Writes */
#define bxt__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metric set :: GtiL3Bank1Writes */
#define bxt__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metric set :: GtiL3Bank2Writes */
#define bxt__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metric set :: GtiL3Bank3Writes */
#define bxt__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metric set :: GtiL3Writes */
#define bxt__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metric set :: GtiRingAccesses */
#define bxt__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended metric set :: GPU Time Elapsed */
#define bxt__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended metric set :: GPU Core Clocks */
#define bxt__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metric set :: AVG GPU Core Frequency */
#define bxt__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended metric set :: AVG GPU Core Frequency */
#define bxt__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended metric set :: CS Threads Dispatched */
#define bxt__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended metric set :: EU Active */
#define bxt__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended metric set :: EU Stall */
#define bxt__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended metric set :: EU Both FPU Pipes Active */
#define bxt__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended metric set :: EU FPU0 Pipe Active */
#define bxt__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended metric set :: EU FPU1 Pipe Active */
#define bxt__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended metric set :: EU AVG IPC Rate */
#define bxt__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended metric set :: EU Send Pipe Active */
#define bxt__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended metric set :: EU Thread Occupancy */
#define bxt__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended metric set :: Sampler Texels */
#define bxt__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended metric set :: Sampler Texels Misses */
#define bxt__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended metric set :: SLM Bytes Read */
#define bxt__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended metric set :: SLM Bytes Written */
#define bxt__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended metric set :: Shader Memory Accesses */
#define bxt__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended metric set :: Shader Atomic Memory Accesses */
#define bxt__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended metric set :: L3 Shader Throughput */
#define bxt__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended metric set :: Shader Barrier Messages */
#define bxt__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended metric set :: EuUntypedReads0 */
#define bxt__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended metric set :: EuTypedReads0 */
#define bxt__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended metric set :: EuUntypedWrites0 */
#define bxt__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended metric set :: EuTypedWrites0 */
#define bxt__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended metric set :: EuUntypedAtomics0 */
#define bxt__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended metric set :: EuTypedAtomics0 */
#define bxt__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended metric set :: EuA64UntypedReads0 */
#define bxt__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended metric set :: EuA64UntypedWrites0 */
#define bxt__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended metric set :: Typed Reads 0 */
#define bxt__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended metric set :: Typed Writes 0 */
#define bxt__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended metric set :: Untyped Reads 0 */
#define bxt__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended metric set :: Untyped Writes 0 */
#define bxt__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended metric set :: Typed Atomics 0 */
#define bxt__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended metric set :: TypedReadsPerCacheLine */
#define bxt__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended metric set :: TypedWritesPerCacheLine */
#define bxt__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended metric set :: UntypedReadsPerCacheLine */
#define bxt__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended metric set :: UntypedWritesPerCacheLine */
#define bxt__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended metric set :: TypedAtomicsPerCacheLine */
#define bxt__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache metric set :: GPU Time Elapsed */
#define bxt__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache metric set :: GPU Core Clocks */
#define bxt__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache metric set :: AVG GPU Core Frequency */
#define bxt__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache metric set :: AVG GPU Core Frequency */
#define bxt__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache metric set :: GPU Busy */
#define bxt__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache metric set :: VS Threads Dispatched */
#define bxt__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metric set :: HS Threads Dispatched */
#define bxt__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache metric set :: DS Threads Dispatched */
#define bxt__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache metric set :: GS Threads Dispatched */
#define bxt__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metric set :: FS Threads Dispatched */
#define bxt__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache metric set :: CS Threads Dispatched */
#define bxt__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache metric set :: EU Active */
#define bxt__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache metric set :: EU Stall */
#define bxt__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache metric set :: EU Both FPU Pipes Active */
#define bxt__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Pipe Active */
#define bxt__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Pipe Active */
#define bxt__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache metric set :: EU AVG IPC Rate */
#define bxt__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache metric set :: EU Send Pipe Active */
#define bxt__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Hybrid Instruction */
#define bxt__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Hybrid Instruction */
#define bxt__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Ternary Instruction */
#define bxt__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Ternary Instruction */
#define bxt__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Binary Instruction */
#define bxt__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Binary Instruction */
#define bxt__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache metric set :: EU FPU0 Move Instruction */
#define bxt__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache metric set :: EU FPU1 Move Instruction */
#define bxt__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache metric set :: Rasterized Pixels */
#define bxt__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache metric set :: Early Hi-Depth Test Fails */
#define bxt__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache metric set :: Early Depth Test Fails */
#define bxt__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metric set :: Samples Killed in FS */
#define bxt__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache metric set :: Pixels Failing Tests */
#define bxt__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache metric set :: Samples Written */
#define bxt__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache metric set :: Samples Blended */
#define bxt__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache metric set :: Sampler Accesses */
#define bxt__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache metric set :: Sampler Texels */
#define bxt__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache metric set :: Sampler Texels Misses */
#define bxt__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache metric set :: SLM Bytes Read */
#define bxt__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache metric set :: SLM Bytes Written */
#define bxt__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache metric set :: Shader Memory Accesses */
#define bxt__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache metric set :: Shader Atomic Memory Accesses */
#define bxt__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache metric set :: L3 Accesses */
#define bxt__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache metric set :: L3 Misses */
#define bxt__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache metric set :: L3 Sampler Throughput */
#define bxt__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache metric set :: L3 Shader Throughput */
#define bxt__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache metric set :: L3 Total Throughput */
#define bxt__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache metric set :: Shader Barrier Messages */
#define bxt__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 00 Accesses */
#define bxt__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 01 Accesses */
#define bxt__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 02 Accesses */
#define bxt__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 03 Accesses */
#define bxt__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 00 IC Accesses */
#define bxt__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache metric set :: L3 Bank 00 IC Hits */
#define bxt__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache metric set :: GTI L3 Throughput */
#define bxt__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache metric set :: GTI Read Throughput */
#define bxt__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache metric set :: GTI Write Throughput */
#define bxt__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define bxt__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define bxt__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define bxt__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define bxt__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define bxt__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define bxt__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define bxt__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define bxt__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define bxt__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define bxt__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define bxt__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define bxt__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define bxt__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define bxt__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define bxt__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define bxt__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define bxt__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define bxt__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define bxt__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define bxt__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define bxt__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define bxt__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define bxt__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define bxt__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define bxt__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define bxt__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define bxt__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define bxt__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define bxt__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define bxt__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define bxt__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define bxt__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define bxt__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define bxt__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define bxt__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define bxt__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define bxt__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define bxt__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define bxt__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define bxt__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define bxt__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define bxt__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define bxt__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define bxt__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define bxt__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define bxt__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define bxt__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define bxt__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define bxt__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define bxt__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define bxt__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define bxt__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define bxt__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define bxt__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define bxt__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define bxt__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define bxt__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define bxt__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define bxt__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define bxt__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define bxt__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define bxt__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define bxt__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define bxt__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define bxt__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define bxt__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define bxt__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define bxt__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define bxt__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define bxt__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define bxt__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define bxt__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define bxt__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define bxt__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define bxt__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define bxt__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define bxt__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define bxt__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define bxt__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define bxt__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define bxt__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define bxt__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define bxt__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define bxt__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define bxt__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define bxt__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define bxt__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define bxt__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define bxt__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define bxt__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define bxt__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define bxt__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define bxt__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define bxt__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define bxt__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define bxt__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define bxt__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define bxt__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define bxt__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define bxt__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define bxt__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define bxt__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define bxt__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define bxt__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define bxt__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define bxt__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define bxt__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define bxt__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define bxt__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define bxt__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define bxt__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define bxt__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define bxt__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define bxt__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define bxt__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define bxt__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define bxt__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define bxt__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define bxt__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define bxt__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define bxt__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define bxt__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define bxt__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define bxt__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define bxt__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define bxt__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define bxt__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define bxt__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define bxt__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define bxt__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define bxt__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define bxt__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define bxt__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define bxt__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define bxt__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define bxt__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define bxt__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define bxt__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define bxt__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define bxt__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define bxt__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define bxt__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define bxt__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define bxt__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define bxt__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define bxt__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define bxt__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define bxt__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define bxt__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define bxt__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define bxt__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define bxt__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define bxt__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define bxt__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define bxt__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define bxt__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define bxt__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define bxt__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define bxt__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define bxt__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define bxt__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define bxt__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define bxt__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define bxt__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define bxt__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define bxt__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define bxt__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define bxt__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define bxt__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define bxt__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define bxt__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define bxt__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define bxt__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define bxt__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define bxt__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define bxt__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define bxt__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define bxt__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define bxt__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define bxt__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define bxt__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define bxt__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define bxt__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define bxt__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define bxt__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define bxt__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define bxt__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define bxt__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define bxt__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define bxt__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define bxt__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define bxt__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define bxt__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define bxt__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define bxt__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define bxt__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define bxt__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define bxt__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define bxt__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define bxt__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define bxt__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define bxt__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define bxt__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define bxt__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define bxt__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define bxt__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define bxt__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define bxt__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define bxt__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define bxt__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define bxt__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define bxt__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define bxt__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define bxt__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define bxt__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define bxt__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define bxt__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define bxt__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define bxt__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define bxt__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define bxt__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define bxt__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define bxt__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define bxt__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define bxt__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define bxt__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define bxt__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define bxt__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define bxt__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define bxt__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define bxt__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define bxt__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define bxt__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define bxt__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define bxt__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define bxt__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define bxt__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define bxt__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define bxt__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define bxt__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define bxt__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define bxt__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define bxt__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define bxt__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define bxt__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define bxt__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define bxt__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define bxt__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define bxt__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define bxt__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define bxt__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define bxt__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define bxt__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define bxt__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define bxt__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define bxt__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define bxt__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
#define bxt__compute_extra__fpu1_active_adjusted__read \
   sklgt2__compute_extra__fpu1_active_adjusted__read

/* Gpu Rings Busyness metrics set :: GPU Time Elapsed */
#define bxt__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness metrics set :: GPU Core Clocks */
#define bxt__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness metrics set :: AVG GPU Core Frequency */
#define bxt__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness metrics set :: AVG GPU Core Frequency */
#define bxt__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness metrics set :: Render Ring Busy */
#define bxt__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness metrics set :: Vdbox0 Ring Busy */
#define bxt__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness metrics set :: Vebox Ring Busy */
#define bxt__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness metrics set :: Blitter Ring Busy */
#define bxt__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness metrics set :: AnyRingBusy */
#define bxt__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* MDAPI testing set :: GPU Time Elapsed */
#define bxt__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define bxt__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define bxt__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define bxt__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define bxt__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define bxt__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define bxt__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define bxt__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define bxt__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define bxt__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define bxt__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define bxt__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define bxt__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define bxt__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define bxt__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define bxt__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define bxt__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define bxt__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* AsyncCompute metrics set :: GPU Time Elapsed */
#define bxt__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute metrics set :: GPU Core Clocks */
#define bxt__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute metrics set :: AVG GPU Core Frequency */
#define bxt__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute metrics set :: AVG GPU Core Frequency */
#define bxt__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute metrics set :: GPU Busy */
#define bxt__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute metrics set :: VS Threads Dispatched */
#define bxt__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute metrics set :: HS Threads Dispatched */
#define bxt__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute metrics set :: DS Threads Dispatched */
#define bxt__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute metrics set :: GS Threads Dispatched */
#define bxt__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute metrics set :: FS Threads Dispatched */
#define bxt__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute metrics set :: CS Threads Dispatched */
#define bxt__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute metrics set :: EU FPU0 Pipe Active */
#define bxt__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute metrics set :: VS FPU0 Pipe Active */
#define bxt__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute metrics set :: PS FPU0 Pipe Active */
#define bxt__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute metrics set :: CS FPU0 Pipe Active */
#define bxt__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute metrics set :: EU FPU1 Pipe Active */
#define bxt__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute metrics set :: VS FPU1 Pipe Active */
#define bxt__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute metrics set :: PS FPU1 Pipe Active */
#define bxt__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute metrics set :: CS FPU1 Pipe Active */
#define bxt__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute metrics set :: EU Thread Occupancy */
#define bxt__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute metrics set :: EU Active */
#define bxt__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute metrics set :: EU Stall */
#define bxt__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define glk__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define glk__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define glk__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define glk__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define glk__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define glk__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define glk__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define glk__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define glk__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define glk__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define glk__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define glk__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define glk__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define glk__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define glk__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define glk__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define glk__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define glk__render_basic__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define glk__render_basic__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define glk__render_basic__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Render Metrics Basic set :: FS Both FPU Active */
#define glk__render_basic__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Render Metrics Basic set :: Sampler 0 Busy */
#define glk__render_basic__sampler0_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler 1 Busy */
#define glk__render_basic__sampler1_busy__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define glk__render_basic__samplers_busy__read \
   bdw__render_basic__samplers_busy__read

/* Render Metrics Basic set :: Sampler 0 Bottleneck */
#define glk__render_basic__sampler0_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics Basic set :: Sampler 1 Bottleneck */
#define glk__render_basic__sampler1_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define glk__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define glk__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define glk__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define glk__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define glk__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define glk__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define glk__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define glk__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define glk__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
#define glk__render_basic__sampler_l1_misses__read \
   sklgt2__render_basic__sampler_l1_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define glk__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define glk__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define glk__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define glk__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Lookup Accesses w/o IC */
#define glk__render_basic__l3_lookups__read \
   sklgt2__render_basic__l3_lookups__read

/* Render Metrics Basic set :: L3 Misses */
#define glk__render_basic__l3_misses__read \
   hsw__compute_extended__typed_atomics0__read

/* Render Metrics Basic set :: L3 Sampler Throughput */
#define glk__render_basic__l3_sampler_throughput__read \
   sklgt2__render_basic__l3_sampler_throughput__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define glk__render_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define glk__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Fixed Pipe Throughput */
#define glk__render_basic__gti_vf_throughput__read \
   bdw__render_basic__gti_vf_throughput__read

/* Render Metrics Basic set :: GTI Depth Throughput */
#define glk__render_basic__gti_depth_throughput__read \
   bdw__render_basic__gti_depth_throughput__read

/* Render Metrics Basic set :: GTI RCC Throughput */
#define glk__render_basic__gti_rcc_throughput__read \
   bdw__render_basic__gti_rcc_throughput__read

/* Render Metrics Basic set :: GTI L3 Throughput */
#define glk__render_basic__gti_l3_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Render Metrics Basic set :: GTI HDC TLB Lookup Throughput */
#define glk__render_basic__gti_hdc_lookups_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define glk__render_basic__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define glk__render_basic__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define glk__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define glk__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define glk__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define glk__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define glk__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define glk__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define glk__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define glk__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define glk__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define glk__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define glk__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define glk__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define glk__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define glk__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define glk__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define glk__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define glk__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define glk__compute_basic__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define glk__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define glk__compute_basic__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define glk__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define glk__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define glk__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define glk__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define glk__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define glk__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define glk__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define glk__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define glk__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define glk__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define glk__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define glk__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define glk__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define glk__compute_basic__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define glk__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define glk__compute_basic__typed_bytes_read__read \
   hsw__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define glk__compute_basic__typed_bytes_written__read \
   bxt__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define glk__compute_basic__untyped_bytes_read__read \
   hsw__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Writes */
#define glk__compute_basic__untyped_bytes_written__read \
   bxt__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define glk__compute_basic__gti_read_throughput__read \
   hsw__render_basic__gti_l3_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define glk__compute_basic__gti_write_throughput__read \
   bdw__render_basic__gti_hdc_lookups_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define glk__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define glk__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define glk__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define glk__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define glk__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define glk__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define glk__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define glk__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define glk__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define glk__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define glk__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define glk__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define glk__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define glk__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define glk__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define glk__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define glk__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define glk__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define glk__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define glk__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define glk__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define glk__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define glk__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define glk__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define glk__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define glk__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define glk__render_pipe_profile__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define glk__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define glk__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define glk__render_pipe_profile__vs_bottleneck__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define glk__render_pipe_profile__hs_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define glk__render_pipe_profile__ds_bottleneck__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define glk__render_pipe_profile__gs_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define glk__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define glk__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define glk__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define glk__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define glk__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define glk__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define glk__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define glk__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define glk__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define glk__render_pipe_profile__cl_stall__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define glk__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_stall__read

/* Memory Reads Distribution metrics set :: GPU Time Elapsed */
#define glk__memory_reads__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Reads Distribution metrics set :: GPU Core Clocks */
#define glk__memory_reads__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define glk__memory_reads__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Reads Distribution metrics set :: AVG GPU Core Frequency */
#define glk__memory_reads__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Reads Distribution metrics set :: GPU Busy */
#define glk__memory_reads__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Reads Distribution metrics set :: VS Threads Dispatched */
#define glk__memory_reads__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: HS Threads Dispatched */
#define glk__memory_reads__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Reads Distribution metrics set :: DS Threads Dispatched */
#define glk__memory_reads__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Reads Distribution metrics set :: GS Threads Dispatched */
#define glk__memory_reads__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Reads Distribution metrics set :: FS Threads Dispatched */
#define glk__memory_reads__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Reads Distribution metrics set :: CS Threads Dispatched */
#define glk__memory_reads__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Reads Distribution metrics set :: EU Active */
#define glk__memory_reads__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Reads Distribution metrics set :: EU Stall */
#define glk__memory_reads__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Reads Distribution metrics set :: Rasterized Pixels */
#define glk__memory_reads__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Reads Distribution metrics set :: Early Hi-Depth Test Fails */
#define glk__memory_reads__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Early Depth Test Fails */
#define glk__memory_reads__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: Samples Killed in FS */
#define glk__memory_reads__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Reads Distribution metrics set :: Pixels Failing Tests */
#define glk__memory_reads__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Reads Distribution metrics set :: Samples Written */
#define glk__memory_reads__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Reads Distribution metrics set :: Samples Blended */
#define glk__memory_reads__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Reads Distribution metrics set :: Sampler Texels */
#define glk__memory_reads__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Reads Distribution metrics set :: Sampler Texels Misses */
#define glk__memory_reads__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Reads Distribution metrics set :: SLM Bytes Read */
#define glk__memory_reads__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Reads Distribution metrics set :: SLM Bytes Written */
#define glk__memory_reads__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Reads Distribution metrics set :: Shader Memory Accesses */
#define glk__memory_reads__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Reads Distribution metrics set :: Shader Atomic Memory Accesses */
#define glk__memory_reads__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Reads Distribution metrics set :: Shader Barrier Messages */
#define glk__memory_reads__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Reads Distribution metrics set :: GtiCmdStreamerMemoryReads */
#define glk__memory_reads__gti_cmd_streamer_memory_reads__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Reads Distribution metrics set :: GtiRsMemoryReads */
#define glk__memory_reads__gti_rs_memory_reads__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Reads Distribution metrics set :: GtiVfMemoryReads */
#define glk__memory_reads__gti_vf_memory_reads__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Memory Reads Distribution metrics set :: GtiRccMemoryReads */
#define glk__memory_reads__gti_rcc_memory_reads__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiMscMemoryReads */
#define glk__memory_reads__gti_msc_memory_reads__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Reads Distribution metrics set :: GtiHizMemoryReads */
#define glk__memory_reads__gti_hiz_memory_reads__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiStcMemoryReads */
#define glk__memory_reads__gti_stc_memory_reads__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Reads Distribution metrics set :: GtiRczMemoryReads */
#define glk__memory_reads__gti_rcz_memory_reads__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Reads Distribution metrics set :: GtiMemoryReads */
#define glk__memory_reads__gti_memory_reads__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank0Reads */
#define glk__memory_reads__gti_l3_bank0_reads__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Reads Distribution metrics set :: GtiL3Bank1Reads */
#define glk__memory_reads__gti_l3_bank1_reads__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Reads Distribution metrics set :: GtiL3Bank2Reads */
#define glk__memory_reads__gti_l3_bank2_reads__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Reads Distribution metrics set :: GtiL3Bank3Reads */
#define glk__memory_reads__gti_l3_bank3_reads__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Reads Distribution metrics set :: GtiL3Reads */
#define glk__memory_reads__gti_l3_reads__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Reads Distribution metrics set :: GtiRingAccesses */
#define glk__memory_reads__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Memory Writes Distribution metrics set :: GPU Time Elapsed */
#define glk__memory_writes__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Memory Writes Distribution metrics set :: GPU Core Clocks */
#define glk__memory_writes__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define glk__memory_writes__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Memory Writes Distribution metrics set :: AVG GPU Core Frequency */
#define glk__memory_writes__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Memory Writes Distribution metrics set :: GPU Busy */
#define glk__memory_writes__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Memory Writes Distribution metrics set :: VS Threads Dispatched */
#define glk__memory_writes__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: HS Threads Dispatched */
#define glk__memory_writes__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Memory Writes Distribution metrics set :: DS Threads Dispatched */
#define glk__memory_writes__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Memory Writes Distribution metrics set :: GS Threads Dispatched */
#define glk__memory_writes__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Memory Writes Distribution metrics set :: FS Threads Dispatched */
#define glk__memory_writes__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Memory Writes Distribution metrics set :: CS Threads Dispatched */
#define glk__memory_writes__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Memory Writes Distribution metrics set :: EU Active */
#define glk__memory_writes__eu_active__read \
   bdw__render_basic__eu_active__read

/* Memory Writes Distribution metrics set :: EU Stall */
#define glk__memory_writes__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Memory Writes Distribution metrics set :: Rasterized Pixels */
#define glk__memory_writes__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Memory Writes Distribution metrics set :: Early Hi-Depth Test Fails */
#define glk__memory_writes__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Early Depth Test Fails */
#define glk__memory_writes__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: Samples Killed in FS */
#define glk__memory_writes__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Memory Writes Distribution metrics set :: Pixels Failing Tests */
#define glk__memory_writes__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Memory Writes Distribution metrics set :: Samples Written */
#define glk__memory_writes__samples_written__read \
   bdw__render_basic__samples_written__read

/* Memory Writes Distribution metrics set :: Samples Blended */
#define glk__memory_writes__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Memory Writes Distribution metrics set :: Sampler Texels */
#define glk__memory_writes__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Memory Writes Distribution metrics set :: Sampler Texels Misses */
#define glk__memory_writes__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Memory Writes Distribution metrics set :: SLM Bytes Read */
#define glk__memory_writes__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Memory Writes Distribution metrics set :: SLM Bytes Written */
#define glk__memory_writes__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Memory Writes Distribution metrics set :: Shader Memory Accesses */
#define glk__memory_writes__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Memory Writes Distribution metrics set :: Shader Atomic Memory Accesses */
#define glk__memory_writes__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Memory Writes Distribution metrics set :: L3 Shader Throughput */
#define glk__memory_writes__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Memory Writes Distribution metrics set :: Shader Barrier Messages */
#define glk__memory_writes__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Memory Writes Distribution metrics set :: GtiCmdStreamerMemoryWrites */
#define glk__memory_writes__gti_cmd_streamer_memory_writes__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Memory Writes Distribution metrics set :: GtiSoMemoryWrites */
#define glk__memory_writes__gti_so_memory_writes__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Memory Writes Distribution metrics set :: GtiRccMemoryWrites */
#define glk__memory_writes__gti_rcc_memory_writes__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiMscMemoryWrites */
#define glk__memory_writes__gti_msc_memory_writes__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Memory Writes Distribution metrics set :: GtiHizMemoryWrites */
#define glk__memory_writes__gti_hiz_memory_writes__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiStcMemoryWrites */
#define glk__memory_writes__gti_stc_memory_writes__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Memory Writes Distribution metrics set :: GtiRczMemoryWrites */
#define glk__memory_writes__gti_rcz_memory_writes__read \
   hsw__compute_extended__gpu_clocks__read

/* Memory Writes Distribution metrics set :: GtiMemoryWrites */
#define glk__memory_writes__gti_memory_writes__read \
   hsw__compute_extended__typed_writes0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank0Writes */
#define glk__memory_writes__gti_l3_bank0_writes__read \
   hsw__compute_extended__typed_atomics0__read

/* Memory Writes Distribution metrics set :: GtiL3Bank1Writes */
#define glk__memory_writes__gti_l3_bank1_writes__read \
   hsw__memory_reads__gti_memory_reads__read

/* Memory Writes Distribution metrics set :: GtiL3Bank2Writes */
#define glk__memory_writes__gti_l3_bank2_writes__read \
   hsw__memory_reads__llc_read_accesses__read

/* Memory Writes Distribution metrics set :: GtiL3Bank3Writes */
#define glk__memory_writes__gti_l3_bank3_writes__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Memory Writes Distribution metrics set :: GtiL3Writes */
#define glk__memory_writes__gti_l3_writes__read \
   bdw__memory_reads__gti_l3_reads__read

/* Memory Writes Distribution metrics set :: GtiRingAccesses */
#define glk__memory_writes__gti_ring_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics Extended set :: GPU Time Elapsed */
#define glk__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extended set :: GPU Core Clocks */
#define glk__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define glk__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extended set :: AVG GPU Core Frequency */
#define glk__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extended set :: CS Threads Dispatched */
#define glk__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Extended set :: EU Active */
#define glk__compute_extended__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Extended set :: EU Stall */
#define glk__compute_extended__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extended set :: EU Both FPU Pipes Active */
#define glk__compute_extended__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Extended set :: EU FPU0 Pipe Active */
#define glk__compute_extended__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Extended set :: EU FPU1 Pipe Active */
#define glk__compute_extended__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Extended set :: EU AVG IPC Rate */
#define glk__compute_extended__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Extended set :: EU Send Pipe Active */
#define glk__compute_extended__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Extended set :: EU Thread Occupancy */
#define glk__compute_extended__eu_thread_occupancy__read \
   bdw__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Extended set :: Sampler Texels */
#define glk__compute_extended__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Extended set :: Sampler Texels Misses */
#define glk__compute_extended__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Extended set :: SLM Bytes Read */
#define glk__compute_extended__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Extended set :: SLM Bytes Written */
#define glk__compute_extended__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Extended set :: Shader Memory Accesses */
#define glk__compute_extended__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Extended set :: Shader Atomic Memory Accesses */
#define glk__compute_extended__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Extended set :: L3 Shader Throughput */
#define glk__compute_extended__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics Extended set :: Shader Barrier Messages */
#define glk__compute_extended__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Extended set :: EuUntypedReads0 */
#define glk__compute_extended__eu_untyped_reads0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Metrics Extended set :: EuTypedReads0 */
#define glk__compute_extended__eu_typed_reads0__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Metrics Extended set :: EuUntypedWrites0 */
#define glk__compute_extended__eu_untyped_writes0__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Compute Metrics Extended set :: EuTypedWrites0 */
#define glk__compute_extended__eu_typed_writes0__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Metrics Extended set :: EuUntypedAtomics0 */
#define glk__compute_extended__eu_untyped_atomics0__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Metrics Extended set :: EuTypedAtomics0 */
#define glk__compute_extended__eu_typed_atomics0__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedReads0 */
#define glk__compute_extended__eu_a64_untyped_reads0__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Compute Metrics Extended set :: EuA64UntypedWrites0 */
#define glk__compute_extended__eu_a64_untyped_writes0__read \
   hsw__compute_extended__gpu_clocks__read

/* Compute Metrics Extended set :: Typed Reads 0 */
#define glk__compute_extended__typed_reads0__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extended set :: Typed Writes 0 */
#define glk__compute_extended__typed_writes0__read \
   hsw__compute_extended__typed_writes0__read

/* Compute Metrics Extended set :: Untyped Reads 0 */
#define glk__compute_extended__untyped_reads0__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Metrics Extended set :: Untyped Writes 0 */
#define glk__compute_extended__untyped_writes0__read \
   hsw__compute_extended__untyped_writes0__read

/* Compute Metrics Extended set :: Typed Atomics 0 */
#define glk__compute_extended__typed_atomics0__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Metrics Extended set :: TypedReadsPerCacheLine */
#define glk__compute_extended__typed_reads_per_cache_line__read \
   hsw__compute_extended__typed_reads_per_cache_line__read

/* Compute Metrics Extended set :: TypedWritesPerCacheLine */
#define glk__compute_extended__typed_writes_per_cache_line__read \
   hsw__compute_extended__typed_writes_per_cache_line__read

/* Compute Metrics Extended set :: UntypedReadsPerCacheLine */
#define glk__compute_extended__untyped_reads_per_cache_line__read \
   bdw__compute_extended__untyped_reads_per_cache_line__read

/* Compute Metrics Extended set :: UntypedWritesPerCacheLine */
#define glk__compute_extended__untyped_writes_per_cache_line__read \
   bdw__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics Extended set :: TypedAtomicsPerCacheLine */
#define glk__compute_extended__typed_atomics_per_cache_line__read \
   hsw__compute_extended__typed_atomics_per_cache_line__read

/* Compute Metrics L3 Cache set :: GPU Time Elapsed */
#define glk__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache set :: GPU Core Clocks */
#define glk__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define glk__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define glk__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache set :: GPU Busy */
#define glk__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache set :: VS Threads Dispatched */
#define glk__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: HS Threads Dispatched */
#define glk__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache set :: DS Threads Dispatched */
#define glk__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache set :: GS Threads Dispatched */
#define glk__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: FS Threads Dispatched */
#define glk__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache set :: CS Threads Dispatched */
#define glk__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache set :: EU Active */
#define glk__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache set :: EU Stall */
#define glk__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache set :: EU Both FPU Pipes Active */
#define glk__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Pipe Active */
#define glk__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Pipe Active */
#define glk__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU AVG IPC Rate */
#define glk__compute_l3_cache__eu_avg_ipc_rate__read \
   bdw__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache set :: EU Send Pipe Active */
#define glk__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Hybrid Instruction */
#define glk__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Hybrid Instruction */
#define glk__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU0 Ternary Instruction */
#define glk__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Ternary Instruction */
#define glk__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Binary Instruction */
#define glk__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Binary Instruction */
#define glk__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Move Instruction */
#define glk__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Move Instruction */
#define glk__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: Rasterized Pixels */
#define glk__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache set :: Early Hi-Depth Test Fails */
#define glk__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Early Depth Test Fails */
#define glk__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Samples Killed in FS */
#define glk__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache set :: Pixels Failing Tests */
#define glk__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache set :: Samples Written */
#define glk__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache set :: Samples Blended */
#define glk__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache set :: Sampler Accesses */
#define glk__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache set :: Sampler Texels */
#define glk__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache set :: Sampler Texels Misses */
#define glk__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache set :: SLM Bytes Read */
#define glk__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache set :: SLM Bytes Written */
#define glk__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache set :: Shader Memory Accesses */
#define glk__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache set :: Shader Atomic Memory Accesses */
#define glk__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache set :: L3 Accesses */
#define glk__compute_l3_cache__l3_accesses__read \
   sklgt2__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache set :: L3 Misses */
#define glk__compute_l3_cache__l3_misses__read \
   bdw__compute_l3_cache__l3_misses__read

/* Compute Metrics L3 Cache set :: L3 Sampler Throughput */
#define glk__compute_l3_cache__l3_sampler_throughput__read \
   chv__render_basic__l3_sampler_throughput__read

/* Compute Metrics L3 Cache set :: L3 Shader Throughput */
#define glk__compute_l3_cache__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache set :: L3 Total Throughput */
#define glk__compute_l3_cache__l3_total_throughput__read \
   sklgt2__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache set :: Shader Barrier Messages */
#define glk__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 Accesses */
#define glk__compute_l3_cache__l3_bank00_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 01 Accesses */
#define glk__compute_l3_cache__l3_bank01_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 02 Accesses */
#define glk__compute_l3_cache__l3_bank02_accesses__read \
   bdw__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 03 Accesses */
#define glk__compute_l3_cache__l3_bank03_accesses__read \
   bdw__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Accesses */
#define glk__compute_l3_cache__l3_bank00_ic_accesses__read \
   bdw__compute_l3_cache__l3_bank00_ic_accesses__read

/* Compute Metrics L3 Cache set :: L3 Bank 00 IC Hits */
#define glk__compute_l3_cache__l3_bank00_ic_hits__read \
   bdw__compute_l3_cache__l3_bank00_ic_hits__read

/* Compute Metrics L3 Cache set :: GTI L3 Throughput */
#define glk__compute_l3_cache__gti_l3_throughput__read \
   bdw__compute_l3_cache__gti_l3_throughput__read

/* Compute Metrics L3 Cache set :: GTI Read Throughput */
#define glk__compute_l3_cache__gti_read_throughput__read \
   bdw__render_basic__gti_read_throughput__read

/* Compute Metrics L3 Cache set :: GTI Write Throughput */
#define glk__compute_l3_cache__gti_write_throughput__read \
   hsw__render_basic__gti_write_throughput__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define glk__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define glk__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define glk__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define glk__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define glk__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define glk__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define glk__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define glk__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define glk__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define glk__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define glk__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define glk__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define glk__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define glk__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define glk__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define glk__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define glk__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define glk__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define glk__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define glk__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define glk__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define glk__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define glk__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define glk__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define glk__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define glk__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define glk__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define glk__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define glk__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define glk__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define glk__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define glk__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define glk__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define glk__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define glk__hdc_and_sf__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define glk__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define glk__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss0) */
#define glk__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: HDC stalled by L3 (s0.ss1) */
#define glk__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: SQ is full */
#define glk__hdc_and_sf__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define glk__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define glk__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define glk__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define glk__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define glk__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define glk__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define glk__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define glk__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define glk__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define glk__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define glk__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define glk__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define glk__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define glk__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define glk__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define glk__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define glk__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define glk__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define glk__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define glk__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define glk__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define glk__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define glk__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define glk__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define glk__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define glk__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define glk__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define glk__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define glk__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define glk__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define glk__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define glk__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define glk__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define glk__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define glk__l3_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define glk__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Stalled */
#define glk__l3_1__l30_bank0_stalled__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Stalled */
#define glk__l3_1__l30_bank1_stalled__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define glk__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define glk__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define glk__l3_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define glk__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define glk__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define glk__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define glk__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define glk__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define glk__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define glk__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define glk__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define glk__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define glk__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define glk__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define glk__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define glk__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define glk__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define glk__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define glk__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define glk__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define glk__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define glk__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define glk__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define glk__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define glk__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define glk__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define glk__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define glk__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define glk__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define glk__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define glk__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define glk__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define glk__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define glk__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define glk__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define glk__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define glk__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define glk__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define glk__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Post-EarlyZ Pixel Data Ready */
#define glk__rasterizer_and_pixel_backend__pixel_data0_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define glk__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 PS Output Available */
#define glk__rasterizer_and_pixel_backend__ps_output0_available__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pixel Values Ready */
#define glk__rasterizer_and_pixel_backend__pixel_values0_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define glk__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define glk__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set Sampler :: GPU Time Elapsed */
#define glk__sampler__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler :: GPU Core Clocks */
#define glk__sampler__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define glk__sampler__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler :: AVG GPU Core Frequency */
#define glk__sampler__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler :: GPU Busy */
#define glk__sampler__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler :: VS Threads Dispatched */
#define glk__sampler__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler :: HS Threads Dispatched */
#define glk__sampler__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler :: DS Threads Dispatched */
#define glk__sampler__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler :: GS Threads Dispatched */
#define glk__sampler__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler :: FS Threads Dispatched */
#define glk__sampler__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler :: CS Threads Dispatched */
#define glk__sampler__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler :: EU Active */
#define glk__sampler__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler :: EU Stall */
#define glk__sampler__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler :: EU Both FPU Pipes Active */
#define glk__sampler__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler :: VS FPU0 Pipe Active */
#define glk__sampler__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler :: VS FPU1 Pipe Active */
#define glk__sampler__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler :: VS Send Pipe Active */
#define glk__sampler__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler :: PS FPU0 Pipe Active */
#define glk__sampler__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler :: PS FPU1 Pipe Active */
#define glk__sampler__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler :: PS Send Pipeline Active */
#define glk__sampler__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler :: FS Both FPU Active */
#define glk__sampler__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler :: Rasterized Pixels */
#define glk__sampler__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler :: Early Hi-Depth Test Fails */
#define glk__sampler__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler :: Early Depth Test Fails */
#define glk__sampler__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Samples Killed in FS */
#define glk__sampler__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler :: Pixels Failing Tests */
#define glk__sampler__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler :: Samples Written */
#define glk__sampler__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler :: Samples Blended */
#define glk__sampler__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler :: Sampler Texels */
#define glk__sampler__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler :: Sampler Texels Misses */
#define glk__sampler__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler :: SLM Bytes Read */
#define glk__sampler__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler :: SLM Bytes Written */
#define glk__sampler__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler :: Shader Memory Accesses */
#define glk__sampler__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler :: Shader Atomic Memory Accesses */
#define glk__sampler__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler :: L3 Shader Throughput */
#define glk__sampler__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set Sampler :: Shader Barrier Messages */
#define glk__sampler__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler :: Slice0 Subslice1 Input Available */
#define glk__sampler__sampler01_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler :: Slice0 Subslice2 Input Available */
#define glk__sampler__sampler02_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler :: Slice0 Subslice0 Input Available */
#define glk__sampler__sampler00_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice2 Sampler Output Ready */
#define glk__sampler__sampler02_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice0 Sampler Output Ready */
#define glk__sampler__sampler00_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler :: Slice0 Subslice1 Sampler Output Ready */
#define glk__sampler__sampler01_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler :: SQ is full */
#define glk__sampler__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define glk__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define glk__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define glk__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define glk__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define glk__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define glk__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define glk__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define glk__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define glk__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define glk__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define glk__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define glk__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define glk__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define glk__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define glk__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define glk__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define glk__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define glk__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define glk__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define glk__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define glk__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define glk__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define glk__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define glk__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define glk__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define glk__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define glk__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define glk__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define glk__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define glk__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define glk__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define glk__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define glk__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define glk__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define glk__tdl_1__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define glk__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice1 */
#define glk__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 */
#define glk__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice0 */
#define glk__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 */
#define glk__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: NonPS Thread Ready For Dispatch on Slice0 Subslice2 */
#define glk__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 */
#define glk__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: SQ is full */
#define glk__tdl_1__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define glk__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define glk__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define glk__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define glk__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define glk__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define glk__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define glk__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define glk__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define glk__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define glk__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define glk__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define glk__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define glk__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define glk__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define glk__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define glk__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define glk__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define glk__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define glk__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define glk__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define glk__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define glk__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define glk__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define glk__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define glk__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define glk__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define glk__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define glk__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define glk__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define glk__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define glk__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define glk__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define glk__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define glk__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define glk__tdl_2__l3_shader_throughput__read \
   bdw__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define glk__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 0 */
#define glk__tdl_2__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 1 */
#define glk__tdl_2__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice0 Port 0 */
#define glk__tdl_2__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 1 */
#define glk__tdl_2__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice2 Port 0 */
#define glk__tdl_2__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_2 :: Thread Header Ready on Slice0 Subslice1 Port 1 */
#define glk__tdl_2__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_2 :: SQ is full */
#define glk__tdl_2__gt_request_queue_full__read \
   bdw__hdc_and_sf__gt_request_queue_full__read

/* Compute Metrics Extra set :: GPU Time Elapsed */
#define glk__compute_extra__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Extra set :: GPU Core Clocks */
#define glk__compute_extra__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define glk__compute_extra__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Extra set :: AVG GPU Core Frequency */
#define glk__compute_extra__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Extra set :: EU FPU1 Pipe Active */
#define glk__compute_extra__fpu1_active__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Extra set :: EU FPU1 Pipe Active including Ext Math */
#define glk__compute_extra__fpu1_active_adjusted__read \
   sklgt2__compute_extra__fpu1_active_adjusted__read

/* MDAPI testing set :: GPU Time Elapsed */
#define glk__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* MDAPI testing set :: GPU Core Clocks */
#define glk__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define glk__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* MDAPI testing set :: AVG GPU Core Frequency */
#define glk__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* MDAPI testing set :: TestCounter0 */
#define glk__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* MDAPI testing set :: TestCounter1 */
#define glk__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* MDAPI testing set :: TestCounter2 */
#define glk__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* MDAPI testing set :: TestCounter3 */
#define glk__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* MDAPI testing set :: TestCounter4 */
#define glk__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* MDAPI testing set :: TestCounter5 */
#define glk__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* MDAPI testing set :: TestCounter6 */
#define glk__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* MDAPI testing set :: TestCounter7 */
#define glk__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* MDAPI testing set :: TestCounter8 */
#define glk__test_oa__counter8__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Metric set PMA Stall :: GPU Time Elapsed */
#define glk__pma__stall__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set PMA Stall :: GPU Core Clocks */
#define glk__pma__stall__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define glk__pma__stall__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set PMA Stall :: AVG GPU Core Frequency */
#define glk__pma__stall__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set PMA Stall :: STC PMA stall */
#define glk__pma__stall__stc_pma_stall__read \
   sklgt2__pma__stall__stc_pma_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define icl__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define icl__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define icl__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define icl__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define icl__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define icl__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define icl__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define icl__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define icl__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define icl__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define icl__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define icl__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define icl__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define icl__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define icl__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define icl__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define icl__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define icl__render_basic__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define icl__render_basic__ps_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define icl__render_basic__ps_send_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: Sampler00 Busy */
#define icl__render_basic__sampler00_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics Basic set :: Samplers Busy */
#define icl__render_basic__samplers_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics Basic set :: Sampler00 Bottleneck */
#define icl__render_basic__sampler00_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define icl__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define icl__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define icl__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define icl__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define icl__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define icl__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define icl__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define icl__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define icl__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
static uint64_t
icl__render_basic__sampler_l1_misses__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ C 5 READ UADD 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;

   return tmp3;
}

/* Render Metrics Basic set :: SLM Bytes Read */
#define icl__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define icl__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define icl__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define icl__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Shader Throughput */
static uint64_t
icl__render_basic__l3_shader_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: $ShaderMemoryAccesses 64 UMUL */
   uint64_t tmp0 = icl__render_basic__shader_memory_accesses__read(perf, query, results) * 64;

   return tmp0;
}

/* Render Metrics Basic set :: Shader Barrier Messages */
#define icl__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Read Throughput */
static uint64_t
icl__render_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  B 1 READ B 3 READ UADD B 4 READ UADD B 5 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];
   uint64_t tmp1 = results->accumulator[query->b_offset + 3];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 4];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->b_offset + 5];
   uint64_t tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = 64 * tmp6;

   return tmp7;
}

/* Render Metrics Basic set :: GTI Write Throughput */
static uint64_t
icl__render_basic__gti_write_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  B 0 READ B 2 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 2];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = 64 * tmp2;

   return tmp3;
}

/* Render Metrics Basic set :: Samplers Bottleneck */
#define icl__render_basic__sampler_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define icl__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define icl__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define icl__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define icl__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define icl__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define icl__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define icl__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define icl__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define icl__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define icl__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define icl__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define icl__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define icl__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define icl__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define icl__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define icl__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
static float
icl__compute_basic__eu_avg_ipc_rate__read(UNUSED struct intel_perf_config *perf,
                                          const struct intel_perf_query_info *query,
                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: A 9 READ  A 10 READ  A 11 READ FADD  A 9 READ FSUB FDIV 1 FADD */
   uint64_t tmp0 = results->accumulator[query->a_offset + 9];
   uint64_t tmp1 = results->accumulator[query->a_offset + 10];
   uint64_t tmp2 = results->accumulator[query->a_offset + 11];
   double tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = results->accumulator[query->a_offset + 9];
   double tmp5 = tmp3 - tmp4;
   double tmp6 = tmp0;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;
   double tmp9 = tmp8 + 1;

   return tmp9;
}

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define icl__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
static float
icl__compute_basic__eu_thread_occupancy__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: 8 A 13 READ FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   double tmp1 = 8 * tmp0;
   double tmp2 = tmp1;
   double tmp3 = perf->sys_vars.eu_threads_count;
   double tmp4 = tmp3 ? tmp2 / tmp3 : 0;
   uint64_t tmp5 = tmp4;
   uint64_t tmp6 = perf->sys_vars.n_eus;
   uint64_t tmp7 = tmp6 ? tmp5 / tmp6 : 0;
   uint64_t tmp8 = tmp7 * 100;
   double tmp9 = tmp8;
   double tmp10 = icl__compute_basic__gpu_core_clocks__read(perf, query, results);
   double tmp11 = tmp10 ? tmp9 / tmp10 : 0;

   return tmp11;
}

/* Compute Metrics Basic set :: Rasterized Pixels */
#define icl__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define icl__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define icl__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define icl__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define icl__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define icl__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define icl__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define icl__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define icl__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define icl__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define icl__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define icl__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define icl__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define icl__compute_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define icl__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
static uint64_t
icl__compute_basic__typed_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ C 6 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * perf->sys_vars.n_eu_sub_slices;
   uint64_t tmp4 = tmp3 * 32;

   return tmp4;
}

/* Compute Metrics Basic set :: Typed Bytes Written */
static uint64_t
icl__compute_basic__typed_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ C 4 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = results->accumulator[query->c_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * perf->sys_vars.n_eu_sub_slices;
   uint64_t tmp4 = tmp3 * 32;

   return tmp4;
}

/* Compute Metrics Basic set :: Untyped Bytes Read */
static uint64_t
icl__compute_basic__untyped_bytes_read__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ C 2 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * perf->sys_vars.n_eu_sub_slices;
   uint64_t tmp4 = tmp3 * 32;

   return tmp4;
}

/* Compute Metrics Basic set :: Untyped Writes */
static uint64_t
icl__compute_basic__untyped_bytes_written__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 1 READ C 0 READ UADD $EuSubslicesTotalCount UMUL 32 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = results->accumulator[query->c_offset + 0];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * perf->sys_vars.n_eu_sub_slices;
   uint64_t tmp4 = tmp3 * 32;

   return tmp4;
}

/* Compute Metrics Basic set :: Typed Atomics Accesses */
static uint64_t
icl__compute_basic__typed_atomics__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: B 4 READ B 5 READ FADD 2 FDIV $EuSubslicesTotalCount FMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 4];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 2;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   double tmp6 = tmp5 * perf->sys_vars.n_eu_sub_slices;

   return tmp6;
}

/* Compute Metrics Basic set :: GTI Read Throughput */
static uint64_t
icl__compute_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  B 1 READ B 3 READ UADD B 6 READ UADD B 7 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 1];
   uint64_t tmp1 = results->accumulator[query->b_offset + 3];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 6];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->b_offset + 7];
   uint64_t tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = 64 * tmp6;

   return tmp7;
}

/* Compute Metrics Basic set :: GTI Write Throughput */
#define icl__compute_basic__gti_write_throughput__read \
   icl__render_basic__gti_write_throughput__read

/* Compute Extended metrics set :: GPU Time Elapsed */
#define icl__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Extended metrics set :: GPU Core Clocks */
#define icl__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Extended metrics set :: AVG GPU Core Frequency */
#define icl__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Extended metrics set :: AVG GPU Core Frequency */
#define icl__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Extended metrics set :: GPU Busy */
#define icl__compute_extended__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Extended metrics set :: CS Threads Dispatched */
#define icl__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Extended metrics set :: Typed Atomics 00 */
#define icl__compute_extended__typed_atomics00__read \
   hsw__memory_reads__gpu_core_clocks__read

/* Compute Extended metrics set :: Typed Reads 00 */
#define icl__compute_extended__typed_reads00__read \
   hsw__memory_reads__llc_read_accesses__read

/* Compute Extended metrics set :: Typed Writes 00 */
#define icl__compute_extended__typed_writes00__read \
   hsw__memory_reads__gti_memory_reads__read

/* Compute Extended metrics set :: Untyped Reads 00 */
#define icl__compute_extended__untyped_reads00__read \
   hsw__compute_extended__typed_atomics0__read

/* Compute Extended metrics set :: Untyped Writes 00 */
#define icl__compute_extended__untyped_writes00__read \
   hsw__compute_extended__untyped_reads0__read

/* Compute Extended metrics set :: Eu Typed Reads 00 */
#define icl__compute_extended__eu_typed_reads00__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Compute Extended metrics set :: Eu Typed Writes 00 */
#define icl__compute_extended__eu_typed_writes00__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Compute Extended metrics set :: Eu Typed Atomics 00 */
#define icl__compute_extended__eu_typed_atomics00__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Compute Extended metrics set :: Eu A32 Untyped Reads 00 */
#define icl__compute_extended__eu_a32_untyped_reads00__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Compute Extended metrics set :: Eu A32 Untyped Writes 00 */
static uint64_t
icl__compute_extended__eu_a32_untyped_writes00__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ B 1 READ UADD */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   uint64_t tmp2 = tmp0 + tmp1;

   return tmp2;
}

/* Compute Extended metrics set :: Eu 64 Untyped Reads 00 */
static float
icl__compute_extended__eu_a64_untyped_reads00__read(UNUSED struct intel_perf_config *perf,
                                                    const struct intel_perf_query_info *query,
                                                    const struct intel_perf_query_result *results)
{
   /* RPN equation: B 5 READ C 1 READ FADD B 6 READ FADD */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 6];
   double tmp4 = tmp2 + tmp3;

   return tmp4;
}

/* Compute Extended metrics set :: Eu A64 Untyped Writes 00 */
#define icl__compute_extended__eu_a64_untyped_writes00__read \
   hsw__render_basic__gpu_core_clocks__read

/* Compute Extended metrics set :: TypedAtomicsPerCacheLine */
static float
icl__compute_extended__typed_atomics_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                          const struct intel_perf_query_info *query,
                                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuTypedAtomics00 $TypedAtomics00 FDIV */
   double tmp0 = icl__compute_extended__eu_typed_atomics00__read(perf, query, results);
   double tmp1 = icl__compute_extended__typed_atomics00__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Extended metrics set :: TypedReadsPerCacheLine */
static float
icl__compute_extended__typed_reads_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                        const struct intel_perf_query_info *query,
                                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuTypedReads00 $TypedReads00 FDIV */
   double tmp0 = icl__compute_extended__eu_typed_reads00__read(perf, query, results);
   double tmp1 = icl__compute_extended__typed_reads00__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Extended metrics set :: TypedWritesPerCacheLine */
static float
icl__compute_extended__typed_writes_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                         const struct intel_perf_query_info *query,
                                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: $EuTypedWrites00 $TypedWrites00 FDIV */
   double tmp0 = icl__compute_extended__eu_typed_writes00__read(perf, query, results);
   double tmp1 = icl__compute_extended__typed_writes00__read(perf, query, results);
   double tmp2 = tmp1 ? tmp0 / tmp1 : 0;

   return tmp2;
}

/* Compute Extended metrics set :: UntypedReadsPerCacheLine */
static float
icl__compute_extended__untyped_reads_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                          const struct intel_perf_query_info *query,
                                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 5 READ C 1 READ FADD B 6 READ FADD FADD C 4 READ FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 5];
   uint64_t tmp2 = results->accumulator[query->c_offset + 1];
   double tmp3 = tmp1 + tmp2;
   uint64_t tmp4 = results->accumulator[query->b_offset + 6];
   double tmp5 = tmp3 + tmp4;
   double tmp6 = tmp0 + tmp5;
   uint64_t tmp7 = results->accumulator[query->c_offset + 4];
   double tmp8 = tmp6;
   double tmp9 = tmp7;
   double tmp10 = tmp9 ? tmp8 / tmp9 : 0;

   return tmp10;
}

/* Compute Extended metrics set :: UntypedWritesPerCacheLine */
static float
icl__compute_extended__untyped_writes_per_cache_line__read(UNUSED struct intel_perf_config *perf,
                                                           const struct intel_perf_query_info *query,
                                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ B 1 READ FADD C 2 READ FADD C 3 READ FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 2];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 3];
   double tmp6 = tmp4;
   double tmp7 = tmp5;
   double tmp8 = tmp7 ? tmp6 / tmp7 : 0;

   return tmp8;
}

/* Compute Metrics L3 Cache metrics set :: GPU Time Elapsed */
#define icl__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache metrics set :: GPU Core Clocks */
#define icl__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache metrics set :: AVG GPU Core Frequency */
#define icl__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache metrics set :: AVG GPU Core Frequency */
#define icl__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache metrics set :: GPU Busy */
#define icl__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache metrics set :: VS Threads Dispatched */
#define icl__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metrics set :: HS Threads Dispatched */
#define icl__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache metrics set :: DS Threads Dispatched */
#define icl__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache metrics set :: GS Threads Dispatched */
#define icl__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache metrics set :: FS Threads Dispatched */
#define icl__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache metrics set :: CS Threads Dispatched */
#define icl__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache metrics set :: EU Active */
#define icl__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache metrics set :: EU Stall */
#define icl__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache metrics set :: EU AVG IPC Rate */
#define icl__compute_l3_cache__eu_avg_ipc_rate__read \
   icl__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache metrics set :: EU Both FPU Pipes Active */
#define icl__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Pipe Active */
#define icl__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Pipe Active */
#define icl__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache metrics set :: EU Send Pipe Active */
#define icl__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Hybrid Instruction */
#define icl__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Hybrid Instruction */
#define icl__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Ternary Instruction */
#define icl__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Ternary Instruction */
#define icl__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Binary Instruction */
#define icl__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Binary Instruction */
#define icl__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache metrics set :: EU FPU0 Move Instruction */
#define icl__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache metrics set :: EU FPU1 Move Instruction */
#define icl__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache metrics set :: Rasterized Pixels */
#define icl__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache metrics set :: Early Hi-Depth Test Fails */
#define icl__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: Early Depth Test Fails */
#define icl__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: Samples Killed in FS */
#define icl__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache metrics set :: Pixels Failing Tests */
#define icl__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache metrics set :: Samples Written */
#define icl__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache metrics set :: Samples Blended */
#define icl__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache metrics set :: Sampler Accesses */
#define icl__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache metrics set :: Sampler Texels */
#define icl__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache metrics set :: Sampler Texels Misses */
#define icl__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache metrics set :: SLM Bytes Read */
#define icl__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache metrics set :: SLM Bytes Written */
#define icl__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache metrics set :: Shader Memory Accesses */
#define icl__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache metrics set :: Shader Atomic Memory Accesses */
#define icl__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache metrics set :: Shader Barrier Messages */
#define icl__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank0 Accesses */
static uint64_t
icl__compute_l3_cache__l3_bank00_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank1 Accesses */
#define icl__compute_l3_cache__l3_bank01_accesses__read \
   hsw__memory_writes__llc_wr_accesses__read

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank2 Accesses */
static uint64_t
icl__compute_l3_cache__l3_bank02_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank3 Accesses */
static uint64_t
icl__compute_l3_cache__l3_bank03_accesses__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ 2 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = tmp0 * 2;

   return tmp1;
}

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank4 Accesses */
#define icl__compute_l3_cache__l3_bank04_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank5 Accesses */
#define icl__compute_l3_cache__l3_bank05_accesses__read \
   bdw__compute_l3_cache__l3_bank10_accesses__read

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank6 Accesses */
#define icl__compute_l3_cache__l3_bank06_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache metrics set :: Slice0 L3 Bank7 Accesses */
#define icl__compute_l3_cache__l3_bank07_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache metrics set :: L3 Accesses */
static uint64_t
icl__compute_l3_cache__l3_accesses__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: $L3Bank00Accesses $L3Bank01Accesses UADD $L3Bank02Accesses UADD $L3Bank03Accesses UADD $L3Bank04Accesses UADD $L3Bank05Accesses UADD $L3Bank06Accesses UADD $L3Bank07Accesses UADD */
   uint64_t tmp0 = icl__compute_l3_cache__l3_bank00_accesses__read(perf, query, results) + icl__compute_l3_cache__l3_bank01_accesses__read(perf, query, results);
   uint64_t tmp1 = tmp0 + icl__compute_l3_cache__l3_bank02_accesses__read(perf, query, results);
   uint64_t tmp2 = tmp1 + icl__compute_l3_cache__l3_bank03_accesses__read(perf, query, results);
   uint64_t tmp3 = tmp2 + icl__compute_l3_cache__l3_bank04_accesses__read(perf, query, results);
   uint64_t tmp4 = tmp3 + icl__compute_l3_cache__l3_bank05_accesses__read(perf, query, results);
   uint64_t tmp5 = tmp4 + icl__compute_l3_cache__l3_bank06_accesses__read(perf, query, results);
   uint64_t tmp6 = tmp5 + icl__compute_l3_cache__l3_bank07_accesses__read(perf, query, results);

   return tmp6;
}

/* Compute Metrics L3 Cache metrics set :: L3 Lookup Accesses w/o IC */
static uint64_t
icl__compute_l3_cache__l3_lookups__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ B 6 READ UADD 2 UMUL A 32 READ UADD */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = results->accumulator[query->a_offset + 32];
   uint64_t tmp5 = tmp3 + tmp4;

   return tmp5;
}

/* Compute Metrics L3 Cache metrics set :: L3 Sampler Throughput */
static uint64_t
icl__compute_l3_cache__l3_sampler_throughput__read(UNUSED struct intel_perf_config *perf,
                                                   const struct intel_perf_query_info *query,
                                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  B 7 READ B 6 READ UADD 2 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 2;
   uint64_t tmp4 = 64 * tmp3;

   return tmp4;
}

/* Compute Metrics L3 Cache metrics set :: L3 Shader Throughput */
#define icl__compute_l3_cache__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache metrics set :: L3 Total Throughput */
static uint64_t
icl__compute_l3_cache__l3_total_throughput__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: $L3Accesses 64 UMUL */
   uint64_t tmp0 = icl__compute_l3_cache__l3_accesses__read(perf, query, results) * 64;

   return tmp0;
}

/* Compute Metrics L3 Cache metrics set :: GTI Read Throughput */
static uint64_t
icl__compute_l3_cache__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Compute Metrics L3 Cache metrics set :: GTI Write Throughput */
static uint64_t
icl__compute_l3_cache__gti_write_throughput__read(UNUSED struct intel_perf_config *perf,
                                                  const struct intel_perf_query_info *query,
                                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = tmp0 * 64;

   return tmp1;
}

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define icl__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define icl__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define icl__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define icl__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define icl__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define icl__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define icl__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define icl__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define icl__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define icl__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define icl__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define icl__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define icl__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define icl__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define icl__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define icl__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define icl__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define icl__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define icl__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define icl__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define icl__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define icl__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define icl__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define icl__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define icl__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define icl__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define icl__render_pipe_profile__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define icl__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define icl__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define icl__render_pipe_profile__vs_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define icl__render_pipe_profile__hs_bottleneck__read \
   bdw__render_pipe_profile__sf_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define icl__render_pipe_profile__ds_bottleneck__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define icl__render_pipe_profile__gs_bottleneck__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define icl__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define icl__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define icl__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define icl__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define icl__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define icl__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define icl__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define icl__render_pipe_profile__ds_stall__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define icl__render_pipe_profile__so_stall__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define icl__render_pipe_profile__cl_stall__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define icl__render_pipe_profile__sf_stall__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define icl__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define icl__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define icl__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define icl__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define icl__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define icl__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define icl__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define icl__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define icl__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define icl__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define icl__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define icl__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define icl__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define icl__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define icl__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define icl__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define icl__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define icl__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define icl__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define icl__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define icl__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define icl__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define icl__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define icl__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define icl__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define icl__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define icl__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define icl__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define icl__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define icl__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define icl__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define icl__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define icl__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define icl__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define icl__hdc_and_sf__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define icl__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3 */
static float
icl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 7];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3 */
static float
icl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 2 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 2];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3 */
static float
icl__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = results->accumulator[query->c_offset + 3];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3 */
static float
icl__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: C 6 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 6];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF :: Polygon Data Ready */
#define icl__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: SQ is full */
#define icl__hdc_and_sf__gt_request_queue_full__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define icl__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define icl__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define icl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define icl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define icl__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define icl__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define icl__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define icl__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define icl__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define icl__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define icl__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define icl__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define icl__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define icl__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define icl__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define icl__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define icl__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define icl__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define icl__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define icl__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define icl__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define icl__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define icl__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define icl__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define icl__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define icl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define icl__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define icl__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define icl__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define icl__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define icl__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define icl__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define icl__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define icl__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define icl__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define icl__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define icl__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define icl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Post-EarlyZ Pixel Data Ready */
#define icl__rasterizer_and_pixel_backend__pixel_data00_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 Post-EarlyZ Pixel Data Ready */
#define icl__rasterizer_and_pixel_backend__pixel_data01_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 PS Output Available */
#define icl__rasterizer_and_pixel_backend__ps_output00_available__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 PS Output Available */
#define icl__rasterizer_and_pixel_backend__ps_output01_available__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Pixel Values Ready */
#define icl__rasterizer_and_pixel_backend__pixel_values00_ready__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 Pixel Values Ready */
#define icl__rasterizer_and_pixel_backend__pixel_values01_ready__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define icl__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define icl__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define icl__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define icl__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define icl__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define icl__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define icl__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define icl__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define icl__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define icl__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define icl__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define icl__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define icl__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define icl__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define icl__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define icl__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define icl__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define icl__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define icl__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define icl__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define icl__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define icl__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define icl__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define icl__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define icl__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define icl__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define icl__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define icl__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define icl__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define icl__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define icl__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define icl__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define icl__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define icl__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define icl__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define icl__l3_1__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define icl__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define icl__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define icl__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank2 Active */
#define icl__l3_1__l30_bank2_active__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank3 Active */
#define icl__l3_1__l30_bank3_active__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank4 Active */
#define icl__l3_1__l30_bank4_active__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank5 Active */
#define icl__l3_1__l30_bank5_active__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank6 Active */
#define icl__l3_1__l30_bank6_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank7 Active */
#define icl__l3_1__l30_bank7_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define icl__l3_1__gt_request_queue_full__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* L2Bank0 stalled metric set :: GPU Time Elapsed */
#define icl__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank0 stalled metric set :: GPU Core Clocks */
#define icl__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank0 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank0 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank0 stalled metric set :: GPU Busy */
#define icl__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank0 stalled metric set :: VS Threads Dispatched */
#define icl__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank0 stalled metric set :: HS Threads Dispatched */
#define icl__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank0 stalled metric set :: DS Threads Dispatched */
#define icl__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank0 stalled metric set :: GS Threads Dispatched */
#define icl__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank0 stalled metric set :: FS Threads Dispatched */
#define icl__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank0 stalled metric set :: CS Threads Dispatched */
#define icl__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank0 stalled metric set :: Rasterized Pixels */
#define icl__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank0 stalled metric set :: Early Hi-Depth Test Fails */
#define icl__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank0 stalled metric set :: Early Depth Test Fails */
#define icl__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank0 stalled metric set :: Samples Killed in FS */
#define icl__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank0 stalled metric set :: Pixels Failing Tests */
#define icl__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank0 stalled metric set :: Samples Written */
#define icl__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank0 stalled metric set :: Samples Blended */
#define icl__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank0 stalled metric set :: EU Active */
#define icl__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank0 stalled metric set :: EU Stall */
#define icl__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank0 stalled metric set :: EU Both FPU Pipes Active */
#define icl__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank0 stalled metric set :: EU Thread Occupancy */
#define icl__l3_2__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank0 stalled metric set :: VS FPU0 Pipe Active */
#define icl__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank0 stalled metric set :: VS FPU1 Pipe Active */
#define icl__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank0 stalled metric set :: VS Send Pipe Active */
#define icl__l3_2__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank0 stalled metric set :: PS FPU0 Pipe Active */
#define icl__l3_2__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank0 stalled metric set :: PS FPU1 Pipe Active */
#define icl__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank0 stalled metric set :: PS Send Pipeline Active */
#define icl__l3_2__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank0 stalled metric set :: FS Both FPU Active */
#define icl__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank0 stalled metric set :: Slice0 L3 Bank0 Stalled */
static float
icl__l3_2__l30_bank0_stalled__read(UNUSED struct intel_perf_config *perf,
                                   const struct intel_perf_query_info *query,
                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: B 0 READ B 1 READ FADD B 2 READ FADD B 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 1];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 2];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->b_offset + 3];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = icl__l3_2__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* L2Bank1 stalled metric set :: GPU Time Elapsed */
#define icl__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank1 stalled metric set :: GPU Core Clocks */
#define icl__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank1 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank1 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank1 stalled metric set :: GPU Busy */
#define icl__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank1 stalled metric set :: VS Threads Dispatched */
#define icl__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank1 stalled metric set :: HS Threads Dispatched */
#define icl__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank1 stalled metric set :: DS Threads Dispatched */
#define icl__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank1 stalled metric set :: GS Threads Dispatched */
#define icl__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank1 stalled metric set :: FS Threads Dispatched */
#define icl__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank1 stalled metric set :: CS Threads Dispatched */
#define icl__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank1 stalled metric set :: Rasterized Pixels */
#define icl__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank1 stalled metric set :: Early Hi-Depth Test Fails */
#define icl__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank1 stalled metric set :: Early Depth Test Fails */
#define icl__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank1 stalled metric set :: Samples Killed in FS */
#define icl__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank1 stalled metric set :: Pixels Failing Tests */
#define icl__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank1 stalled metric set :: Samples Written */
#define icl__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank1 stalled metric set :: Samples Blended */
#define icl__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank1 stalled metric set :: EU Active */
#define icl__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank1 stalled metric set :: EU Stall */
#define icl__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank1 stalled metric set :: EU Both FPU Pipes Active */
#define icl__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank1 stalled metric set :: EU Thread Occupancy */
#define icl__l3_3__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank1 stalled metric set :: VS FPU0 Pipe Active */
#define icl__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank1 stalled metric set :: VS FPU1 Pipe Active */
#define icl__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank1 stalled metric set :: VS Send Pipe Active */
#define icl__l3_3__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank1 stalled metric set :: PS FPU0 Pipe Active */
#define icl__l3_3__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank1 stalled metric set :: PS FPU1 Pipe Active */
#define icl__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank1 stalled metric set :: PS Send Pipeline Active */
#define icl__l3_3__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank1 stalled metric set :: FS Both FPU Active */
#define icl__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank1 stalled metric set :: Slice0 L3 Bank1 Stalled */
#define icl__l3_3__l30_bank1_stalled__read \
   icl__l3_2__l30_bank0_stalled__read

/* L2Bank4 stalled metric set :: GPU Time Elapsed */
#define icl__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank4 stalled metric set :: GPU Core Clocks */
#define icl__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank4 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank4 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank4 stalled metric set :: GPU Busy */
#define icl__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank4 stalled metric set :: VS Threads Dispatched */
#define icl__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank4 stalled metric set :: HS Threads Dispatched */
#define icl__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank4 stalled metric set :: DS Threads Dispatched */
#define icl__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank4 stalled metric set :: GS Threads Dispatched */
#define icl__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank4 stalled metric set :: FS Threads Dispatched */
#define icl__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank4 stalled metric set :: CS Threads Dispatched */
#define icl__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank4 stalled metric set :: Rasterized Pixels */
#define icl__l3_4__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank4 stalled metric set :: Early Hi-Depth Test Fails */
#define icl__l3_4__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank4 stalled metric set :: Early Depth Test Fails */
#define icl__l3_4__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank4 stalled metric set :: Samples Killed in FS */
#define icl__l3_4__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank4 stalled metric set :: Pixels Failing Tests */
#define icl__l3_4__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank4 stalled metric set :: Samples Written */
#define icl__l3_4__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank4 stalled metric set :: Samples Blended */
#define icl__l3_4__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank4 stalled metric set :: EU Active */
#define icl__l3_4__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank4 stalled metric set :: EU Stall */
#define icl__l3_4__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank4 stalled metric set :: EU Both FPU Pipes Active */
#define icl__l3_4__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank4 stalled metric set :: EU Thread Occupancy */
#define icl__l3_4__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank4 stalled metric set :: VS FPU0 Pipe Active */
#define icl__l3_4__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank4 stalled metric set :: VS FPU1 Pipe Active */
#define icl__l3_4__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank4 stalled metric set :: VS Send Pipe Active */
#define icl__l3_4__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank4 stalled metric set :: PS FPU0 Pipe Active */
#define icl__l3_4__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank4 stalled metric set :: PS FPU1 Pipe Active */
#define icl__l3_4__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank4 stalled metric set :: PS Send Pipeline Active */
#define icl__l3_4__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank4 stalled metric set :: FS Both FPU Active */
#define icl__l3_4__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank4 stalled metric set :: Slice0 L3 Bank4 Stalled */
#define icl__l3_4__l30_bank4_stalled__read \
   icl__l3_2__l30_bank0_stalled__read

/* L2Bank5 stalled metric set :: GPU Time Elapsed */
#define icl__l3_5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank5 stalled metric set :: GPU Core Clocks */
#define icl__l3_5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank5 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank5 stalled metric set :: AVG GPU Core Frequency */
#define icl__l3_5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank5 stalled metric set :: GPU Busy */
#define icl__l3_5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank5 stalled metric set :: VS Threads Dispatched */
#define icl__l3_5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank5 stalled metric set :: HS Threads Dispatched */
#define icl__l3_5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank5 stalled metric set :: DS Threads Dispatched */
#define icl__l3_5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank5 stalled metric set :: GS Threads Dispatched */
#define icl__l3_5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank5 stalled metric set :: FS Threads Dispatched */
#define icl__l3_5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank5 stalled metric set :: CS Threads Dispatched */
#define icl__l3_5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank5 stalled metric set :: Rasterized Pixels */
#define icl__l3_5__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank5 stalled metric set :: Early Hi-Depth Test Fails */
#define icl__l3_5__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank5 stalled metric set :: Early Depth Test Fails */
#define icl__l3_5__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank5 stalled metric set :: Samples Killed in FS */
#define icl__l3_5__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank5 stalled metric set :: Pixels Failing Tests */
#define icl__l3_5__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank5 stalled metric set :: Samples Written */
#define icl__l3_5__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank5 stalled metric set :: Samples Blended */
#define icl__l3_5__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank5 stalled metric set :: EU Active */
#define icl__l3_5__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank5 stalled metric set :: EU Stall */
#define icl__l3_5__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank5 stalled metric set :: EU Both FPU Pipes Active */
#define icl__l3_5__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank5 stalled metric set :: EU Thread Occupancy */
#define icl__l3_5__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank5 stalled metric set :: VS FPU0 Pipe Active */
#define icl__l3_5__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank5 stalled metric set :: VS FPU1 Pipe Active */
#define icl__l3_5__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank5 stalled metric set :: VS Send Pipe Active */
#define icl__l3_5__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank5 stalled metric set :: PS FPU0 Pipe Active */
#define icl__l3_5__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank5 stalled metric set :: PS FPU1 Pipe Active */
#define icl__l3_5__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank5 stalled metric set :: PS Send Pipeline Active */
#define icl__l3_5__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank5 stalled metric set :: FS Both FPU Active */
#define icl__l3_5__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank5 stalled metric set :: Slice0 L3 Bank5 Stalled */
static float
icl__l3_5__l30_bank5_stalled__read(UNUSED struct intel_perf_config *perf,
                                   const struct intel_perf_query_info *query,
                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: B 2 READ B 3 READ FADD B 0 READ FADD B 1 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 2];
   uint64_t tmp1 = results->accumulator[query->b_offset + 3];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 0];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->b_offset + 1];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = icl__l3_5__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* Metric set Sampler 1 :: GPU Time Elapsed */
#define icl__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler 1 :: GPU Core Clocks */
#define icl__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler 1 :: AVG GPU Core Frequency */
#define icl__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler 1 :: AVG GPU Core Frequency */
#define icl__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler 1 :: GPU Busy */
#define icl__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler 1 :: VS Threads Dispatched */
#define icl__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler 1 :: HS Threads Dispatched */
#define icl__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler 1 :: DS Threads Dispatched */
#define icl__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler 1 :: GS Threads Dispatched */
#define icl__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler 1 :: FS Threads Dispatched */
#define icl__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler 1 :: CS Threads Dispatched */
#define icl__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler 1 :: EU Active */
#define icl__sampler_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler 1 :: EU Stall */
#define icl__sampler_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler 1 :: EU Both FPU Pipes Active */
#define icl__sampler_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler 1 :: VS FPU0 Pipe Active */
#define icl__sampler_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler 1 :: VS FPU1 Pipe Active */
#define icl__sampler_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler 1 :: VS Send Pipe Active */
#define icl__sampler_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler 1 :: PS FPU0 Pipe Active */
#define icl__sampler_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler 1 :: PS FPU1 Pipe Active */
#define icl__sampler_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler 1 :: PS Send Pipeline Active */
#define icl__sampler_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler 1 :: FS Both FPU Active */
#define icl__sampler_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler 1 :: Rasterized Pixels */
#define icl__sampler_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler 1 :: Early Hi-Depth Test Fails */
#define icl__sampler_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler 1 :: Early Depth Test Fails */
#define icl__sampler_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 1 :: Samples Killed in FS */
#define icl__sampler_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler 1 :: Pixels Failing Tests */
#define icl__sampler_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler 1 :: Samples Written */
#define icl__sampler_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler 1 :: Samples Blended */
#define icl__sampler_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler 1 :: Sampler Texels */
#define icl__sampler_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler 1 :: Sampler Texels Misses */
#define icl__sampler_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler 1 :: SLM Bytes Read */
#define icl__sampler_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler 1 :: SLM Bytes Written */
#define icl__sampler_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler 1 :: Shader Memory Accesses */
#define icl__sampler_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler 1 :: Shader Atomic Memory Accesses */
#define icl__sampler_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler 1 :: L3 Shader Throughput */
#define icl__sampler_1__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set Sampler 1 :: Shader Barrier Messages */
#define icl__sampler_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 1 :: Slice0 Subslice0 Input Available */
#define icl__sampler_1__sampler00_input_available__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice1 Input Available */
#define icl__sampler_1__sampler01_input_available__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice2 Input Available */
#define icl__sampler_1__sampler02_input_available__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice3 Input Available */
#define icl__sampler_1__sampler03_input_available__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice4 Input Available */
#define icl__sampler_1__sampler04_input_available__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice5 Input Available */
#define icl__sampler_1__sampler05_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice6 Input Available */
#define icl__sampler_1__sampler06_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler 1 :: Slice0 Subslice7 Input Available */
#define icl__sampler_1__sampler07_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler 2 :: GPU Time Elapsed */
#define icl__sampler_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler 2 :: GPU Core Clocks */
#define icl__sampler_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler 2 :: AVG GPU Core Frequency */
#define icl__sampler_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler 2 :: AVG GPU Core Frequency */
#define icl__sampler_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler 2 :: GPU Busy */
#define icl__sampler_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler 2 :: VS Threads Dispatched */
#define icl__sampler_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler 2 :: HS Threads Dispatched */
#define icl__sampler_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler 2 :: DS Threads Dispatched */
#define icl__sampler_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler 2 :: GS Threads Dispatched */
#define icl__sampler_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler 2 :: FS Threads Dispatched */
#define icl__sampler_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler 2 :: CS Threads Dispatched */
#define icl__sampler_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler 2 :: EU Active */
#define icl__sampler_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler 2 :: EU Stall */
#define icl__sampler_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler 2 :: EU Both FPU Pipes Active */
#define icl__sampler_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler 2 :: VS FPU0 Pipe Active */
#define icl__sampler_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler 2 :: VS FPU1 Pipe Active */
#define icl__sampler_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler 2 :: VS Send Pipe Active */
#define icl__sampler_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler 2 :: PS FPU0 Pipe Active */
#define icl__sampler_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler 2 :: PS FPU1 Pipe Active */
#define icl__sampler_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler 2 :: PS Send Pipeline Active */
#define icl__sampler_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler 2 :: FS Both FPU Active */
#define icl__sampler_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler 2 :: Rasterized Pixels */
#define icl__sampler_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler 2 :: Early Hi-Depth Test Fails */
#define icl__sampler_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler 2 :: Early Depth Test Fails */
#define icl__sampler_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 2 :: Samples Killed in FS */
#define icl__sampler_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler 2 :: Pixels Failing Tests */
#define icl__sampler_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler 2 :: Samples Written */
#define icl__sampler_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler 2 :: Samples Blended */
#define icl__sampler_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler 2 :: Sampler Texels */
#define icl__sampler_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler 2 :: Sampler Texels Misses */
#define icl__sampler_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler 2 :: SLM Bytes Read */
#define icl__sampler_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler 2 :: SLM Bytes Written */
#define icl__sampler_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler 2 :: Shader Memory Accesses */
#define icl__sampler_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler 2 :: Shader Atomic Memory Accesses */
#define icl__sampler_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler 2 :: L3 Shader Throughput */
#define icl__sampler_2__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set Sampler 2 :: Shader Barrier Messages */
#define icl__sampler_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 2 :: Slice0 Subslice0 Sampler Output Ready */
#define icl__sampler_2__sampler00_output_ready__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set Sampler 2 :: Slice0 Subslice1 Sampler Output Ready */
#define icl__sampler_2__sampler01_output_ready__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice2 Sampler Output Ready */
#define icl__sampler_2__sampler02_output_ready__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice3 Sampler Output Ready */
#define icl__sampler_2__sampler03_output_ready__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice4 Sampler Output Ready */
#define icl__sampler_2__sampler04_output_ready__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice5 Sampler Output Ready */
#define icl__sampler_2__sampler05_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice6 Sampler Output Ready */
#define icl__sampler_2__sampler06_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler 2 :: Slice0 Subslice7 Sampler Output Ready */
#define icl__sampler_2__sampler07_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler 2 :: SQ is full */
#define icl__sampler_2__gt_request_queue_full__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define icl__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define icl__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define icl__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define icl__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define icl__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define icl__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define icl__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define icl__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define icl__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define icl__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define icl__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define icl__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define icl__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define icl__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define icl__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define icl__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define icl__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: VS EU Active */
#define icl__tdl_1__vs_eu_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Metric set TDL_1 :: VS AVG Active per Thread */
static uint64_t
icl__tdl_1__vs_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                          const struct intel_perf_query_info *query,
                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: A 13 READ $VsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 13];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = icl__tdl_1__vs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set TDL_1 :: VS EU Stall */
#define icl__tdl_1__vs_eu_stall__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Metric set TDL_1 :: VS AVG Stall per Thread */
static uint64_t
icl__tdl_1__vs_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 14 READ $VsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 14];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = icl__tdl_1__vs_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define icl__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define icl__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define icl__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define icl__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: FS EU Active */
#define icl__tdl_1__ps_eu_active__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Metric set TDL_1 :: FS AVG Active per Thread */
static uint64_t
icl__tdl_1__ps_eu_active_per_thread__read(UNUSED struct intel_perf_config *perf,
                                          const struct intel_perf_query_info *query,
                                          const struct intel_perf_query_result *results)
{
   /* RPN equation: A 19 READ $PsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 19];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = icl__tdl_1__ps_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set TDL_1 :: FS EU Stall */
#define icl__tdl_1__ps_eu_stall__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Metric set TDL_1 :: FS AVG Stall per Thread */
static uint64_t
icl__tdl_1__ps_eu_stall_per_thread__read(UNUSED struct intel_perf_config *perf,
                                         const struct intel_perf_query_info *query,
                                         const struct intel_perf_query_result *results)
{
   /* RPN equation: A 20 READ $PsThreads UDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 20];
   uint64_t tmp1 = tmp0;
   uint64_t tmp2 = icl__tdl_1__ps_threads__read(perf, query, results);
   uint64_t tmp3 = tmp2 ? tmp1 / tmp2 : 0;

   return tmp3;
}

/* Metric set TDL_1 :: Rasterized Pixels */
#define icl__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define icl__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define icl__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define icl__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define icl__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define icl__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define icl__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define icl__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define icl__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define icl__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define icl__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define icl__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define icl__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define icl__tdl_1__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define icl__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher */
#define icl__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher */
#define icl__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher */
#define icl__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher */
#define icl__tdl_1__ps_thread03_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher */
#define icl__tdl_1__ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher */
#define icl__tdl_1__ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher */
#define icl__tdl_1__ps_thread06_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher */
#define icl__tdl_1__ps_thread07_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define icl__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define icl__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define icl__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define icl__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define icl__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define icl__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define icl__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define icl__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define icl__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define icl__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define icl__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define icl__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define icl__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define icl__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define icl__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define icl__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define icl__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define icl__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define icl__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define icl__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define icl__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define icl__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define icl__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define icl__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define icl__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define icl__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define icl__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define icl__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define icl__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define icl__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define icl__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define icl__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define icl__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define icl__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define icl__tdl_2__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define icl__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread00_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ B 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->b_offset + 7];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread01_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 0 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 0];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread02_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 1 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread03_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 2 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread04_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 3 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 3];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread05_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 4 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 4];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread06_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 5 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher */
static float
icl__tdl_2__non_ps_thread07_ready_for_dispatch__read(UNUSED struct intel_perf_config *perf,
                                                     const struct intel_perf_query_info *query,
                                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 6 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = icl__tdl_2__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set TDL_2 :: SQ is full */
#define icl__tdl_2__gt_request_queue_full__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_3 :: GPU Time Elapsed */
#define icl__tdl_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_3 :: GPU Core Clocks */
#define icl__tdl_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_3 :: AVG GPU Core Frequency */
#define icl__tdl_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_3 :: AVG GPU Core Frequency */
#define icl__tdl_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_3 :: GPU Busy */
#define icl__tdl_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_3 :: VS Threads Dispatched */
#define icl__tdl_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_3 :: HS Threads Dispatched */
#define icl__tdl_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_3 :: DS Threads Dispatched */
#define icl__tdl_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_3 :: GS Threads Dispatched */
#define icl__tdl_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_3 :: FS Threads Dispatched */
#define icl__tdl_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_3 :: CS Threads Dispatched */
#define icl__tdl_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_3 :: EU Active */
#define icl__tdl_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_3 :: EU Stall */
#define icl__tdl_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_3 :: EU Both FPU Pipes Active */
#define icl__tdl_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_3 :: VS FPU0 Pipe Active */
#define icl__tdl_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_3 :: VS FPU1 Pipe Active */
#define icl__tdl_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_3 :: VS Send Pipe Active */
#define icl__tdl_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_3 :: PS FPU0 Pipe Active */
#define icl__tdl_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_3 :: PS FPU1 Pipe Active */
#define icl__tdl_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_3 :: PS Send Pipeline Active */
#define icl__tdl_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_3 :: FS Both FPU Active */
#define icl__tdl_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_3 :: Rasterized Pixels */
#define icl__tdl_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_3 :: Early Hi-Depth Test Fails */
#define icl__tdl_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_3 :: Early Depth Test Fails */
#define icl__tdl_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_3 :: Samples Killed in FS */
#define icl__tdl_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_3 :: Pixels Failing Tests */
#define icl__tdl_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_3 :: Samples Written */
#define icl__tdl_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_3 :: Samples Blended */
#define icl__tdl_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_3 :: Sampler Texels */
#define icl__tdl_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_3 :: Sampler Texels Misses */
#define icl__tdl_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_3 :: SLM Bytes Read */
#define icl__tdl_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_3 :: SLM Bytes Written */
#define icl__tdl_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_3 :: Shader Memory Accesses */
#define icl__tdl_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_3 :: Shader Atomic Memory Accesses */
#define icl__tdl_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_3 :: L3 Shader Throughput */
#define icl__tdl_3__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set TDL_3 :: Shader Barrier Messages */
#define icl__tdl_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header00_ready_port0__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header00_ready_port1__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header01_ready_port0__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header01_ready_port1__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header03_ready_port0__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header03_ready_port1__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header04_ready_port0__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header04_ready_port1__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header05_ready_port0__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header05_ready_port1__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header06_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header06_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0 */
#define icl__tdl_3__thread_header07_ready_port0__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1 */
#define icl__tdl_3__thread_header07_ready_port1__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define icl__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define icl__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define icl__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define icl__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: GPU Busy */
#define icl__gpu_busyness__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Gpu Rings Busyness :: Render Ring Busy */
#define icl__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define icl__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vdbox1 Ring Busy */
#define icl__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define icl__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define icl__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* Gpu Rings Busyness :: Posh Ring Busy */
#define icl__gpu_busyness__posh_engine_busy__read \
   bdw__render_pipe_profile__ds_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define icl__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TestOa :: GPU Time Elapsed */
#define icl__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TestOa :: GPU Core Clocks */
#define icl__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define icl__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define icl__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TestOa :: TestCounter0 */
#define icl__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Metric set TestOa :: TestCounter1 */
#define icl__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Metric set TestOa :: TestCounter2 */
#define icl__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Metric set TestOa :: TestCounter3 */
#define icl__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Metric set TestOa :: TestCounter4 */
#define icl__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Metric set TestOa :: TestCounter5 */
#define icl__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Metric set TestOa :: TestCounter6 */
#define icl__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Metric set TestOa :: TestCounter7 */
#define icl__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set TestOa :: TestCounter8 */
#define icl__test_oa__counter8__read \
   hsw__compute_extended__typed_writes0__read

/* AsyncCompute :: GPU Time Elapsed */
#define icl__async_compute__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* AsyncCompute :: GPU Core Clocks */
#define icl__async_compute__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define icl__async_compute__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* AsyncCompute :: AVG GPU Core Frequency */
#define icl__async_compute__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* AsyncCompute :: GPU Busy */
#define icl__async_compute__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* AsyncCompute :: VS Threads Dispatched */
#define icl__async_compute__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* AsyncCompute :: HS Threads Dispatched */
#define icl__async_compute__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* AsyncCompute :: DS Threads Dispatched */
#define icl__async_compute__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* AsyncCompute :: GS Threads Dispatched */
#define icl__async_compute__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* AsyncCompute :: FS Threads Dispatched */
#define icl__async_compute__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* AsyncCompute :: CS Threads Dispatched */
#define icl__async_compute__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* AsyncCompute :: EU FPU0 Pipe Active */
#define icl__async_compute__fpu0_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* AsyncCompute :: VS FPU0 Pipe Active */
#define icl__async_compute__vs_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* AsyncCompute :: PS FPU0 Pipe Active */
#define icl__async_compute__ps_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* AsyncCompute :: CS FPU0 Pipe Active */
#define icl__async_compute__cs_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* AsyncCompute :: EU FPU1 Pipe Active */
#define icl__async_compute__fpu1_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* AsyncCompute :: VS FPU1 Pipe Active */
#define icl__async_compute__vs_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* AsyncCompute :: PS FPU1 Pipe Active */
#define icl__async_compute__ps_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* AsyncCompute :: CS FPU1 Pipe Active */
#define icl__async_compute__cs_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* AsyncCompute :: EU Thread Occupancy */
#define icl__async_compute__eu_thread_occupancy__read \
   sklgt2__ff_bottlenecks__eu_thread_occupancy__read

/* AsyncCompute :: EU Active */
#define icl__async_compute__eu_active__read \
   bdw__render_basic__eu_active__read

/* AsyncCompute :: EU Stall */
#define icl__async_compute__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define ehl__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define ehl__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define ehl__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define ehl__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define ehl__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define ehl__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define ehl__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define ehl__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define ehl__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define ehl__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define ehl__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define ehl__render_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define ehl__render_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Both FPU Pipes Active */
#define ehl__render_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Render Metrics Basic set :: VS FPU0 Pipe Active */
#define ehl__render_basic__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Render Metrics Basic set :: VS FPU1 Pipe Active */
#define ehl__render_basic__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Render Metrics Basic set :: VS Send Pipe Active */
#define ehl__render_basic__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Render Metrics Basic set :: PS FPU0 Pipe Active */
#define ehl__render_basic__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Render Metrics Basic set :: PS FPU1 Pipe Active */
#define ehl__render_basic__ps_fpu1_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Render Metrics Basic set :: PS Send Pipeline Active */
#define ehl__render_basic__ps_send_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Render Metrics Basic set :: Sampler00 Busy */
#define ehl__render_basic__sampler00_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics Basic set :: Samplers Busy */
#define ehl__render_basic__samplers_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics Basic set :: Sampler00 Bottleneck */
#define ehl__render_basic__sampler00_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define ehl__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define ehl__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define ehl__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define ehl__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define ehl__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define ehl__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define ehl__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define ehl__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define ehl__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: Sampler Cache Misses */
static uint64_t
ehl__render_basic__sampler_l1_misses__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ C 5 READ UADD 8 UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 8;

   return tmp3;
}

/* Render Metrics Basic set :: SLM Bytes Read */
#define ehl__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define ehl__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define ehl__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define ehl__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Shader Throughput */
static uint64_t
ehl__render_basic__l3_shader_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: A 30 READ A 31 READ UADD A 32 READ UADD 64 UMUL */
   uint64_t tmp0 = results->accumulator[query->a_offset + 30];
   uint64_t tmp1 = results->accumulator[query->a_offset + 31];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 32];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = tmp4 * 64;

   return tmp5;
}

/* Render Metrics Basic set :: Shader Barrier Messages */
#define ehl__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Read Throughput */
static uint64_t
ehl__render_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                             const struct intel_perf_query_info *query,
                                             const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  B 5 READ B 4 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];
   uint64_t tmp1 = results->accumulator[query->b_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = 64 * tmp2;

   return tmp3;
}

/* Render Metrics Basic set :: GTI Write Throughput */
#define ehl__render_basic__gti_write_throughput__read \
   icl__render_basic__gti_write_throughput__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define ehl__render_basic__sampler_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define ehl__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define ehl__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define ehl__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define ehl__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define ehl__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define ehl__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define ehl__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define ehl__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define ehl__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define ehl__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define ehl__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define ehl__compute_basic__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define ehl__compute_basic__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU AVG IPC Rate */
#define ehl__compute_basic__eu_avg_ipc_rate__read \
   icl__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics Basic set :: EU Both FPU Pipes Active */
#define ehl__compute_basic__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics Basic set :: EU FPU0 Pipe Active */
#define ehl__compute_basic__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics Basic set :: EU FPU1 Pipe Active */
#define ehl__compute_basic__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics Basic set :: EU Send Pipe Active */
#define ehl__compute_basic__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define ehl__compute_basic__eu_thread_occupancy__read \
   icl__compute_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define ehl__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define ehl__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define ehl__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define ehl__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define ehl__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define ehl__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define ehl__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define ehl__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define ehl__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define ehl__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define ehl__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define ehl__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define ehl__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define ehl__compute_basic__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define ehl__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Typed Bytes Read */
#define ehl__compute_basic__typed_bytes_read__read \
   icl__compute_basic__typed_bytes_read__read

/* Compute Metrics Basic set :: Typed Bytes Written */
#define ehl__compute_basic__typed_bytes_written__read \
   icl__compute_basic__typed_bytes_written__read

/* Compute Metrics Basic set :: Untyped Bytes Read */
#define ehl__compute_basic__untyped_bytes_read__read \
   icl__compute_basic__untyped_bytes_read__read

/* Compute Metrics Basic set :: Untyped Writes */
#define ehl__compute_basic__untyped_bytes_written__read \
   icl__compute_basic__untyped_bytes_written__read

/* Compute Metrics Basic set :: Typed Atomics Accesses */
#define ehl__compute_basic__typed_atomics__read \
   icl__compute_basic__typed_atomics__read

/* Compute Metrics Basic set :: GTI Read Throughput */
static uint64_t
ehl__compute_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  B 7 READ B 6 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = 64 * tmp2;

   return tmp3;
}

/* Compute Metrics Basic set :: GTI Write Throughput */
#define ehl__compute_basic__gti_write_throughput__read \
   icl__render_basic__gti_write_throughput__read

/* ComputeExtended metrics set :: GPU Time Elapsed */
#define ehl__compute_extended__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* ComputeExtended metrics set :: GPU Core Clocks */
#define ehl__compute_extended__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* ComputeExtended metrics set :: AVG GPU Core Frequency */
#define ehl__compute_extended__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* ComputeExtended metrics set :: AVG GPU Core Frequency */
#define ehl__compute_extended__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* ComputeExtended metrics set :: GPU Busy */
#define ehl__compute_extended__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* ComputeExtended metrics set :: CS Threads Dispatched */
#define ehl__compute_extended__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* ComputeExtended metrics set :: Typed Atomics 00 */
#define ehl__compute_extended__typed_atomics00__read \
   hsw__memory_reads__gpu_core_clocks__read

/* ComputeExtended metrics set :: Typed Reads 00 */
#define ehl__compute_extended__typed_reads00__read \
   hsw__memory_reads__llc_read_accesses__read

/* ComputeExtended metrics set :: Typed Writes 00 */
#define ehl__compute_extended__typed_writes00__read \
   hsw__memory_reads__gti_memory_reads__read

/* ComputeExtended metrics set :: Untyped Reads 00 */
#define ehl__compute_extended__untyped_reads00__read \
   hsw__compute_extended__typed_atomics0__read

/* ComputeExtended metrics set :: Untyped Writes 00 */
#define ehl__compute_extended__untyped_writes00__read \
   hsw__compute_extended__untyped_reads0__read

/* ComputeExtended metrics set :: Eu Typed Reads 00 */
#define ehl__compute_extended__eu_typed_reads00__read \
   hsw__compute_extended__eu_typed_writes0__read

/* ComputeExtended metrics set :: Eu Typed Writes 00 */
#define ehl__compute_extended__eu_typed_writes00__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* ComputeExtended metrics set :: Eu Typed Atomics 00 */
#define ehl__compute_extended__eu_typed_atomics00__read \
   hsw__compute_extended__eu_typed_reads0__read

/* ComputeExtended metrics set :: Eu A32 Untyped Reads 00 */
#define ehl__compute_extended__eu_a32_untyped_reads00__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* ComputeExtended metrics set :: Eu A32 Untyped Writes 00 */
#define ehl__compute_extended__eu_a32_untyped_writes00__read \
   icl__compute_extended__eu_a32_untyped_writes00__read

/* ComputeExtended metrics set :: Eu 64 Untyped Reads 00 */
#define ehl__compute_extended__eu_a64_untyped_reads00__read \
   icl__compute_extended__eu_a64_untyped_reads00__read

/* ComputeExtended metrics set :: Eu A64 Untyped Writes 00 */
#define ehl__compute_extended__eu_a64_untyped_writes00__read \
   hsw__render_basic__gpu_core_clocks__read

/* ComputeExtended metrics set :: TypedAtomicsPerCacheLine */
#define ehl__compute_extended__typed_atomics_per_cache_line__read \
   icl__compute_extended__typed_atomics_per_cache_line__read

/* ComputeExtended metrics set :: TypedReadsPerCacheLine */
#define ehl__compute_extended__typed_reads_per_cache_line__read \
   icl__compute_extended__typed_reads_per_cache_line__read

/* ComputeExtended metrics set :: TypedWritesPerCacheLine */
#define ehl__compute_extended__typed_writes_per_cache_line__read \
   icl__compute_extended__typed_writes_per_cache_line__read

/* ComputeExtended metrics set :: UntypedReadsPerCacheLine */
#define ehl__compute_extended__untyped_reads_per_cache_line__read \
   icl__compute_extended__untyped_reads_per_cache_line__read

/* ComputeExtended metrics set :: UntypedWritesPerCacheLine */
#define ehl__compute_extended__untyped_writes_per_cache_line__read \
   icl__compute_extended__untyped_writes_per_cache_line__read

/* Compute Metrics L3 Cache set :: GPU Time Elapsed */
#define ehl__compute_l3_cache__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics L3 Cache set :: GPU Core Clocks */
#define ehl__compute_l3_cache__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define ehl__compute_l3_cache__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics L3 Cache set :: AVG GPU Core Frequency */
#define ehl__compute_l3_cache__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics L3 Cache set :: GPU Busy */
#define ehl__compute_l3_cache__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics L3 Cache set :: VS Threads Dispatched */
#define ehl__compute_l3_cache__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: HS Threads Dispatched */
#define ehl__compute_l3_cache__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics L3 Cache set :: DS Threads Dispatched */
#define ehl__compute_l3_cache__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics L3 Cache set :: GS Threads Dispatched */
#define ehl__compute_l3_cache__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics L3 Cache set :: FS Threads Dispatched */
#define ehl__compute_l3_cache__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics L3 Cache set :: CS Threads Dispatched */
#define ehl__compute_l3_cache__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics L3 Cache set :: EU Active */
#define ehl__compute_l3_cache__eu_active__read \
   bdw__render_basic__eu_active__read

/* Compute Metrics L3 Cache set :: EU Stall */
#define ehl__compute_l3_cache__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Compute Metrics L3 Cache set :: EU AVG IPC Rate */
#define ehl__compute_l3_cache__eu_avg_ipc_rate__read \
   icl__compute_basic__eu_avg_ipc_rate__read

/* Compute Metrics L3 Cache set :: EU Both FPU Pipes Active */
#define ehl__compute_l3_cache__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Pipe Active */
#define ehl__compute_l3_cache__fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Pipe Active */
#define ehl__compute_l3_cache__fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU Send Pipe Active */
#define ehl__compute_l3_cache__eu_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Hybrid Instruction */
#define ehl__compute_l3_cache__eu_hybrid_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Hybrid Instruction */
#define ehl__compute_l3_cache__eu_hybrid_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU0 Ternary Instruction */
#define ehl__compute_l3_cache__eu_ternary_fpu0_instruction__read \
   bdw__render_basic__ps_fpu0_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Ternary Instruction */
#define ehl__compute_l3_cache__eu_ternary_fpu1_instruction__read \
   bdw__render_basic__ps_fpu1_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Binary Instruction */
#define ehl__compute_l3_cache__eu_binary_fpu0_instruction__read \
   bdw__render_basic__ps_send_active__read

/* Compute Metrics L3 Cache set :: EU FPU1 Binary Instruction */
#define ehl__compute_l3_cache__eu_binary_fpu1_instruction__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Compute Metrics L3 Cache set :: EU FPU0 Move Instruction */
#define ehl__compute_l3_cache__eu_move_fpu0_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Compute Metrics L3 Cache set :: EU FPU1 Move Instruction */
#define ehl__compute_l3_cache__eu_move_fpu1_instruction__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Compute Metrics L3 Cache set :: Rasterized Pixels */
#define ehl__compute_l3_cache__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics L3 Cache set :: Early Hi-Depth Test Fails */
#define ehl__compute_l3_cache__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Early Depth Test Fails */
#define ehl__compute_l3_cache__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Samples Killed in FS */
#define ehl__compute_l3_cache__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics L3 Cache set :: Pixels Failing Tests */
#define ehl__compute_l3_cache__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics L3 Cache set :: Samples Written */
#define ehl__compute_l3_cache__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics L3 Cache set :: Samples Blended */
#define ehl__compute_l3_cache__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics L3 Cache set :: Sampler Accesses */
#define ehl__compute_l3_cache__sampler_accesses__read \
   bdw__render_pipe_profile__sampler_accesses__read

/* Compute Metrics L3 Cache set :: Sampler Texels */
#define ehl__compute_l3_cache__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics L3 Cache set :: Sampler Texels Misses */
#define ehl__compute_l3_cache__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics L3 Cache set :: SLM Bytes Read */
#define ehl__compute_l3_cache__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics L3 Cache set :: SLM Bytes Written */
#define ehl__compute_l3_cache__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics L3 Cache set :: Shader Memory Accesses */
#define ehl__compute_l3_cache__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics L3 Cache set :: Shader Atomic Memory Accesses */
#define ehl__compute_l3_cache__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics L3 Cache set :: Shader Barrier Messages */
#define ehl__compute_l3_cache__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank0 Accesses */
#define ehl__compute_l3_cache__l3_bank00_accesses__read \
   icl__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank1 Accesses */
#define ehl__compute_l3_cache__l3_bank01_accesses__read \
   hsw__memory_writes__llc_wr_accesses__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank2 Accesses */
#define ehl__compute_l3_cache__l3_bank02_accesses__read \
   icl__compute_l3_cache__l3_bank02_accesses__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank3 Accesses */
#define ehl__compute_l3_cache__l3_bank03_accesses__read \
   icl__compute_l3_cache__l3_bank03_accesses__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank4 Accesses */
#define ehl__compute_l3_cache__l3_bank04_accesses__read \
   bdw__memory_reads__gti_ring_accesses__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank5 Accesses */
#define ehl__compute_l3_cache__l3_bank05_accesses__read \
   bdw__compute_l3_cache__l3_bank10_accesses__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank6 Accesses */
#define ehl__compute_l3_cache__l3_bank06_accesses__read \
   bdw__compute_l3_cache__l3_bank01_accesses__read

/* Compute Metrics L3 Cache set :: Slice0 L3 Bank7 Accesses */
#define ehl__compute_l3_cache__l3_bank07_accesses__read \
   bdw__compute_l3_cache__l3_bank00_accesses__read

/* Compute Metrics L3 Cache set :: L3 Accesses */
#define ehl__compute_l3_cache__l3_accesses__read \
   icl__compute_l3_cache__l3_accesses__read

/* Compute Metrics L3 Cache set :: L3 Lookup Accesses w/o IC */
static uint64_t
ehl__compute_l3_cache__l3_lookups__read(UNUSED struct intel_perf_config *perf,
                                        const struct intel_perf_query_info *query,
                                        const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ B 6 READ UADD 8 UMUL A 32 READ UADD */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 8;
   uint64_t tmp4 = results->accumulator[query->a_offset + 32];
   uint64_t tmp5 = tmp3 + tmp4;

   return tmp5;
}

/* Compute Metrics L3 Cache set :: L3 Misses */
static uint64_t
ehl__compute_l3_cache__l3_misses__read(UNUSED struct intel_perf_config *perf,
                                       const struct intel_perf_query_info *query,
                                       const struct intel_perf_query_result *results)
{
   /* RPN equation: 2 B 5 READ B 4 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 5];
   uint64_t tmp1 = results->accumulator[query->b_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = 2 * tmp2;

   return tmp3;
}

/* Compute Metrics L3 Cache set :: L3 Sampler Throughput */
static uint64_t
ehl__compute_l3_cache__l3_sampler_throughput__read(UNUSED struct intel_perf_config *perf,
                                                   const struct intel_perf_query_info *query,
                                                   const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  B 7 READ B 6 READ UADD 8 UMUL UMUL */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = tmp2 * 8;
   uint64_t tmp4 = 64 * tmp3;

   return tmp4;
}

/* Compute Metrics L3 Cache set :: L3 Shader Throughput */
#define ehl__compute_l3_cache__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Compute Metrics L3 Cache set :: L3 Total Throughput */
#define ehl__compute_l3_cache__l3_total_throughput__read \
   icl__compute_l3_cache__l3_total_throughput__read

/* Compute Metrics L3 Cache set :: GTI L3 Throughput */
static uint64_t
ehl__compute_l3_cache__gti_l3_throughput__read(UNUSED struct intel_perf_config *perf,
                                               const struct intel_perf_query_info *query,
                                               const struct intel_perf_query_result *results)
{
   /* RPN equation: $L3Misses 64 UMUL */
   uint64_t tmp0 = ehl__compute_l3_cache__l3_misses__read(perf, query, results) * 64;

   return tmp0;
}

/* Compute Metrics L3 Cache set :: GTI Read Throughput */
#define ehl__compute_l3_cache__gti_read_throughput__read \
   icl__compute_l3_cache__gti_read_throughput__read

/* Compute Metrics L3 Cache set :: GTI Write Throughput */
#define ehl__compute_l3_cache__gti_write_throughput__read \
   icl__compute_l3_cache__gti_write_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define ehl__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define ehl__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define ehl__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define ehl__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define ehl__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define ehl__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define ehl__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define ehl__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define ehl__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define ehl__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define ehl__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define ehl__render_pipe_profile__eu_active__read \
   bdw__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define ehl__render_pipe_profile__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define ehl__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define ehl__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define ehl__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define ehl__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define ehl__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define ehl__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define ehl__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define ehl__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define ehl__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define ehl__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define ehl__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define ehl__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define ehl__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define ehl__render_pipe_profile__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define ehl__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define ehl__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define ehl__render_pipe_profile__vs_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define ehl__render_pipe_profile__hs_bottleneck__read \
   bdw__render_pipe_profile__sf_stall__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define ehl__render_pipe_profile__ds_bottleneck__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define ehl__render_pipe_profile__gs_bottleneck__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define ehl__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define ehl__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define ehl__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define ehl__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Bottleneck */
#define ehl__render_pipe_profile__early_depth_bottleneck__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define ehl__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define ehl__render_pipe_profile__hs_stall__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define ehl__render_pipe_profile__ds_stall__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define ehl__render_pipe_profile__so_stall__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define ehl__render_pipe_profile__cl_stall__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define ehl__render_pipe_profile__sf_stall__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define ehl__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define ehl__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define ehl__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define ehl__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define ehl__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define ehl__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define ehl__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define ehl__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define ehl__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define ehl__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define ehl__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define ehl__hdc_and_sf__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define ehl__hdc_and_sf__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Both FPU Pipes Active */
#define ehl__hdc_and_sf__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set HDCAndSF :: VS FPU0 Pipe Active */
#define ehl__hdc_and_sf__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set HDCAndSF :: VS FPU1 Pipe Active */
#define ehl__hdc_and_sf__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set HDCAndSF :: VS Send Pipe Active */
#define ehl__hdc_and_sf__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set HDCAndSF :: PS FPU0 Pipe Active */
#define ehl__hdc_and_sf__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set HDCAndSF :: PS FPU1 Pipe Active */
#define ehl__hdc_and_sf__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set HDCAndSF :: PS Send Pipeline Active */
#define ehl__hdc_and_sf__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set HDCAndSF :: FS Both FPU Active */
#define ehl__hdc_and_sf__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define ehl__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define ehl__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define ehl__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define ehl__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define ehl__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define ehl__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define ehl__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define ehl__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define ehl__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define ehl__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define ehl__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define ehl__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define ehl__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define ehl__hdc_and_sf__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define ehl__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Slice0 Subslice group 0 Non-sampler Shader Access Stalled On L3 */
#define ehl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   icl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read

/* Metric set HDCAndSF :: Slice0 Subslice group 1 Non-sampler Shader Access Stalled On L3 */
#define ehl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   icl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read

/* Metric set HDCAndSF :: Slice0 Subslice group 2 Non-sampler Shader Access Stalled On L3 */
#define ehl__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   icl__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read

/* Metric set HDCAndSF :: Slice0 Subslice group 3 Non-sampler Shader Access Stalled On L3 */
#define ehl__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read \
   icl__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define ehl__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: SQ is full */
#define ehl__hdc_and_sf__gt_request_queue_full__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define ehl__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define ehl__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define ehl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define ehl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define ehl__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define ehl__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define ehl__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define ehl__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define ehl__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define ehl__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define ehl__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define ehl__rasterizer_and_pixel_backend__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define ehl__rasterizer_and_pixel_backend__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Both FPU Pipes Active */
#define ehl__rasterizer_and_pixel_backend__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU0 Pipe Active */
#define ehl__rasterizer_and_pixel_backend__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: VS FPU1 Pipe Active */
#define ehl__rasterizer_and_pixel_backend__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: VS Send Pipe Active */
#define ehl__rasterizer_and_pixel_backend__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU0 Pipe Active */
#define ehl__rasterizer_and_pixel_backend__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set RasterizerAndPixelBackend :: PS FPU1 Pipe Active */
#define ehl__rasterizer_and_pixel_backend__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set RasterizerAndPixelBackend :: PS Send Pipeline Active */
#define ehl__rasterizer_and_pixel_backend__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set RasterizerAndPixelBackend :: FS Both FPU Active */
#define ehl__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define ehl__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define ehl__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define ehl__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define ehl__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define ehl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define ehl__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define ehl__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define ehl__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define ehl__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define ehl__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define ehl__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define ehl__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define ehl__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define ehl__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define ehl__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define ehl__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define ehl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Post-EarlyZ Pixel Data Ready */
#define ehl__rasterizer_and_pixel_backend__pixel_data00_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 Post-EarlyZ Pixel Data Ready */
#define ehl__rasterizer_and_pixel_backend__pixel_data01_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 PS Output Available */
#define ehl__rasterizer_and_pixel_backend__ps_output00_available__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 PS Output Available */
#define ehl__rasterizer_and_pixel_backend__ps_output01_available__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Pixel Values Ready */
#define ehl__rasterizer_and_pixel_backend__pixel_values00_ready__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 Pixel Values Ready */
#define ehl__rasterizer_and_pixel_backend__pixel_values01_ready__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ is full */
#define ehl__rasterizer_and_pixel_backend__gt_request_queue_full__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: GPU Time Elapsed */
#define ehl__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set L3_1 :: GPU Core Clocks */
#define ehl__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define ehl__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set L3_1 :: AVG GPU Core Frequency */
#define ehl__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set L3_1 :: GPU Busy */
#define ehl__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set L3_1 :: VS Threads Dispatched */
#define ehl__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set L3_1 :: HS Threads Dispatched */
#define ehl__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set L3_1 :: DS Threads Dispatched */
#define ehl__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set L3_1 :: GS Threads Dispatched */
#define ehl__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set L3_1 :: FS Threads Dispatched */
#define ehl__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set L3_1 :: CS Threads Dispatched */
#define ehl__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set L3_1 :: EU Active */
#define ehl__l3_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set L3_1 :: EU Stall */
#define ehl__l3_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set L3_1 :: EU Both FPU Pipes Active */
#define ehl__l3_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set L3_1 :: VS FPU0 Pipe Active */
#define ehl__l3_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set L3_1 :: VS FPU1 Pipe Active */
#define ehl__l3_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set L3_1 :: VS Send Pipe Active */
#define ehl__l3_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set L3_1 :: PS FPU0 Pipe Active */
#define ehl__l3_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set L3_1 :: PS FPU1 Pipe Active */
#define ehl__l3_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set L3_1 :: PS Send Pipeline Active */
#define ehl__l3_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set L3_1 :: FS Both FPU Active */
#define ehl__l3_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set L3_1 :: Rasterized Pixels */
#define ehl__l3_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set L3_1 :: Early Hi-Depth Test Fails */
#define ehl__l3_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set L3_1 :: Early Depth Test Fails */
#define ehl__l3_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Samples Killed in FS */
#define ehl__l3_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set L3_1 :: Pixels Failing Tests */
#define ehl__l3_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set L3_1 :: Samples Written */
#define ehl__l3_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set L3_1 :: Samples Blended */
#define ehl__l3_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set L3_1 :: Sampler Texels */
#define ehl__l3_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set L3_1 :: Sampler Texels Misses */
#define ehl__l3_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set L3_1 :: SLM Bytes Read */
#define ehl__l3_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set L3_1 :: SLM Bytes Written */
#define ehl__l3_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set L3_1 :: Shader Memory Accesses */
#define ehl__l3_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set L3_1 :: Shader Atomic Memory Accesses */
#define ehl__l3_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set L3_1 :: L3 Shader Throughput */
#define ehl__l3_1__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set L3_1 :: Shader Barrier Messages */
#define ehl__l3_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set L3_1 :: Slice0 L3 Bank0 Active */
#define ehl__l3_1__l30_bank0_active__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank1 Active */
#define ehl__l3_1__l30_bank1_active__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank2 Active */
#define ehl__l3_1__l30_bank2_active__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank3 Active */
#define ehl__l3_1__l30_bank3_active__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank4 Active */
#define ehl__l3_1__l30_bank4_active__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank5 Active */
#define ehl__l3_1__l30_bank5_active__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set L3_1 :: Slice0 L3 Bank6 Active */
#define ehl__l3_1__l30_bank6_active__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set L3_1 :: Slice0 L3 Bank7 Active */
#define ehl__l3_1__l30_bank7_active__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set L3_1 :: SQ is full */
#define ehl__l3_1__gt_request_queue_full__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* L2Bank0 stalled metric set :: GPU Time Elapsed */
#define ehl__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank0 stalled metric set :: GPU Core Clocks */
#define ehl__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank0 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank0 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank0 stalled metric set :: GPU Busy */
#define ehl__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank0 stalled metric set :: VS Threads Dispatched */
#define ehl__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank0 stalled metric set :: HS Threads Dispatched */
#define ehl__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank0 stalled metric set :: DS Threads Dispatched */
#define ehl__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank0 stalled metric set :: GS Threads Dispatched */
#define ehl__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank0 stalled metric set :: FS Threads Dispatched */
#define ehl__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank0 stalled metric set :: CS Threads Dispatched */
#define ehl__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank0 stalled metric set :: Rasterized Pixels */
#define ehl__l3_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank0 stalled metric set :: Early Hi-Depth Test Fails */
#define ehl__l3_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank0 stalled metric set :: Early Depth Test Fails */
#define ehl__l3_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank0 stalled metric set :: Samples Killed in FS */
#define ehl__l3_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank0 stalled metric set :: Pixels Failing Tests */
#define ehl__l3_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank0 stalled metric set :: Samples Written */
#define ehl__l3_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank0 stalled metric set :: Samples Blended */
#define ehl__l3_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank0 stalled metric set :: EU Active */
#define ehl__l3_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank0 stalled metric set :: EU Stall */
#define ehl__l3_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank0 stalled metric set :: EU Both FPU Pipes Active */
#define ehl__l3_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank0 stalled metric set :: EU Thread Occupancy */
#define ehl__l3_2__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank0 stalled metric set :: VS FPU0 Pipe Active */
#define ehl__l3_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank0 stalled metric set :: VS FPU1 Pipe Active */
#define ehl__l3_2__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank0 stalled metric set :: VS Send Pipe Active */
#define ehl__l3_2__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank0 stalled metric set :: PS FPU0 Pipe Active */
#define ehl__l3_2__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank0 stalled metric set :: PS FPU1 Pipe Active */
#define ehl__l3_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank0 stalled metric set :: PS Send Pipeline Active */
#define ehl__l3_2__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank0 stalled metric set :: FS Both FPU Active */
#define ehl__l3_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank0 stalled metric set :: Slice0 L3 Bank0 Stalled */
#define ehl__l3_2__l30_bank0_stalled__read \
   icl__l3_2__l30_bank0_stalled__read

/* L2Bank1 stalled metric set :: GPU Time Elapsed */
#define ehl__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank1 stalled metric set :: GPU Core Clocks */
#define ehl__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank1 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank1 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank1 stalled metric set :: GPU Busy */
#define ehl__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank1 stalled metric set :: VS Threads Dispatched */
#define ehl__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank1 stalled metric set :: HS Threads Dispatched */
#define ehl__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank1 stalled metric set :: DS Threads Dispatched */
#define ehl__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank1 stalled metric set :: GS Threads Dispatched */
#define ehl__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank1 stalled metric set :: FS Threads Dispatched */
#define ehl__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank1 stalled metric set :: CS Threads Dispatched */
#define ehl__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank1 stalled metric set :: Rasterized Pixels */
#define ehl__l3_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank1 stalled metric set :: Early Hi-Depth Test Fails */
#define ehl__l3_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank1 stalled metric set :: Early Depth Test Fails */
#define ehl__l3_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank1 stalled metric set :: Samples Killed in FS */
#define ehl__l3_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank1 stalled metric set :: Pixels Failing Tests */
#define ehl__l3_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank1 stalled metric set :: Samples Written */
#define ehl__l3_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank1 stalled metric set :: Samples Blended */
#define ehl__l3_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank1 stalled metric set :: EU Active */
#define ehl__l3_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank1 stalled metric set :: EU Stall */
#define ehl__l3_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank1 stalled metric set :: EU Both FPU Pipes Active */
#define ehl__l3_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank1 stalled metric set :: EU Thread Occupancy */
#define ehl__l3_3__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank1 stalled metric set :: VS FPU0 Pipe Active */
#define ehl__l3_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank1 stalled metric set :: VS FPU1 Pipe Active */
#define ehl__l3_3__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank1 stalled metric set :: VS Send Pipe Active */
#define ehl__l3_3__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank1 stalled metric set :: PS FPU0 Pipe Active */
#define ehl__l3_3__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank1 stalled metric set :: PS FPU1 Pipe Active */
#define ehl__l3_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank1 stalled metric set :: PS Send Pipeline Active */
#define ehl__l3_3__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank1 stalled metric set :: FS Both FPU Active */
#define ehl__l3_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank1 stalled metric set :: Slice0 L3 Bank1 Stalled */
#define ehl__l3_3__l30_bank1_stalled__read \
   icl__l3_2__l30_bank0_stalled__read

/* L2Bank4 stalled metric set :: GPU Time Elapsed */
#define ehl__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank4 stalled metric set :: GPU Core Clocks */
#define ehl__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank4 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank4 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank4 stalled metric set :: GPU Busy */
#define ehl__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank4 stalled metric set :: VS Threads Dispatched */
#define ehl__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank4 stalled metric set :: HS Threads Dispatched */
#define ehl__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank4 stalled metric set :: DS Threads Dispatched */
#define ehl__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank4 stalled metric set :: GS Threads Dispatched */
#define ehl__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank4 stalled metric set :: FS Threads Dispatched */
#define ehl__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank4 stalled metric set :: CS Threads Dispatched */
#define ehl__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank4 stalled metric set :: Rasterized Pixels */
#define ehl__l3_4__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank4 stalled metric set :: Early Hi-Depth Test Fails */
#define ehl__l3_4__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank4 stalled metric set :: Early Depth Test Fails */
#define ehl__l3_4__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank4 stalled metric set :: Samples Killed in FS */
#define ehl__l3_4__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank4 stalled metric set :: Pixels Failing Tests */
#define ehl__l3_4__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank4 stalled metric set :: Samples Written */
#define ehl__l3_4__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank4 stalled metric set :: Samples Blended */
#define ehl__l3_4__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank4 stalled metric set :: EU Active */
#define ehl__l3_4__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank4 stalled metric set :: EU Stall */
#define ehl__l3_4__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank4 stalled metric set :: EU Both FPU Pipes Active */
#define ehl__l3_4__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank4 stalled metric set :: EU Thread Occupancy */
#define ehl__l3_4__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank4 stalled metric set :: VS FPU0 Pipe Active */
#define ehl__l3_4__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank4 stalled metric set :: VS FPU1 Pipe Active */
#define ehl__l3_4__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank4 stalled metric set :: VS Send Pipe Active */
#define ehl__l3_4__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank4 stalled metric set :: PS FPU0 Pipe Active */
#define ehl__l3_4__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank4 stalled metric set :: PS FPU1 Pipe Active */
#define ehl__l3_4__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank4 stalled metric set :: PS Send Pipeline Active */
#define ehl__l3_4__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank4 stalled metric set :: FS Both FPU Active */
#define ehl__l3_4__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank4 stalled metric set :: Slice0 L3 Bank4 Stalled */
#define ehl__l3_4__l30_bank4_stalled__read \
   icl__l3_2__l30_bank0_stalled__read

/* L2Bank5 stalled metric set :: GPU Time Elapsed */
#define ehl__l3_5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L2Bank5 stalled metric set :: GPU Core Clocks */
#define ehl__l3_5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L2Bank5 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L2Bank5 stalled metric set :: AVG GPU Core Frequency */
#define ehl__l3_5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L2Bank5 stalled metric set :: GPU Busy */
#define ehl__l3_5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L2Bank5 stalled metric set :: VS Threads Dispatched */
#define ehl__l3_5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L2Bank5 stalled metric set :: HS Threads Dispatched */
#define ehl__l3_5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L2Bank5 stalled metric set :: DS Threads Dispatched */
#define ehl__l3_5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L2Bank5 stalled metric set :: GS Threads Dispatched */
#define ehl__l3_5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L2Bank5 stalled metric set :: FS Threads Dispatched */
#define ehl__l3_5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L2Bank5 stalled metric set :: CS Threads Dispatched */
#define ehl__l3_5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L2Bank5 stalled metric set :: Rasterized Pixels */
#define ehl__l3_5__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* L2Bank5 stalled metric set :: Early Hi-Depth Test Fails */
#define ehl__l3_5__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* L2Bank5 stalled metric set :: Early Depth Test Fails */
#define ehl__l3_5__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* L2Bank5 stalled metric set :: Samples Killed in FS */
#define ehl__l3_5__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* L2Bank5 stalled metric set :: Pixels Failing Tests */
#define ehl__l3_5__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* L2Bank5 stalled metric set :: Samples Written */
#define ehl__l3_5__samples_written__read \
   bdw__render_basic__samples_written__read

/* L2Bank5 stalled metric set :: Samples Blended */
#define ehl__l3_5__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* L2Bank5 stalled metric set :: EU Active */
#define ehl__l3_5__eu_active__read \
   bdw__render_basic__eu_active__read

/* L2Bank5 stalled metric set :: EU Stall */
#define ehl__l3_5__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* L2Bank5 stalled metric set :: EU Both FPU Pipes Active */
#define ehl__l3_5__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* L2Bank5 stalled metric set :: EU Thread Occupancy */
#define ehl__l3_5__eu_thread_occupancy__read \
   bdw__vme_pipe__eu_thread_occupancy__read

/* L2Bank5 stalled metric set :: VS FPU0 Pipe Active */
#define ehl__l3_5__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* L2Bank5 stalled metric set :: VS FPU1 Pipe Active */
#define ehl__l3_5__vs_fpu1_active__read \
   bdw__render_basic__vs_send_active__read

/* L2Bank5 stalled metric set :: VS Send Pipe Active */
#define ehl__l3_5__vs_send_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* L2Bank5 stalled metric set :: PS FPU0 Pipe Active */
#define ehl__l3_5__ps_fpu0_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* L2Bank5 stalled metric set :: PS FPU1 Pipe Active */
#define ehl__l3_5__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* L2Bank5 stalled metric set :: PS Send Pipeline Active */
#define ehl__l3_5__ps_send_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* L2Bank5 stalled metric set :: FS Both FPU Active */
#define ehl__l3_5__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_send_active__read

/* L2Bank5 stalled metric set :: Slice0 L3 Bank5 Stalled */
#define ehl__l3_5__l30_bank5_stalled__read \
   icl__l3_5__l30_bank5_stalled__read

/* Metric set Sampler 1 :: GPU Time Elapsed */
#define ehl__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler 1 :: GPU Core Clocks */
#define ehl__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler 1 :: AVG GPU Core Frequency */
#define ehl__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler 1 :: AVG GPU Core Frequency */
#define ehl__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler 1 :: GPU Busy */
#define ehl__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler 1 :: VS Threads Dispatched */
#define ehl__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler 1 :: HS Threads Dispatched */
#define ehl__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler 1 :: DS Threads Dispatched */
#define ehl__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler 1 :: GS Threads Dispatched */
#define ehl__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler 1 :: FS Threads Dispatched */
#define ehl__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler 1 :: CS Threads Dispatched */
#define ehl__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler 1 :: EU Active */
#define ehl__sampler_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler 1 :: EU Stall */
#define ehl__sampler_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler 1 :: EU Both FPU Pipes Active */
#define ehl__sampler_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler 1 :: VS FPU0 Pipe Active */
#define ehl__sampler_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler 1 :: VS FPU1 Pipe Active */
#define ehl__sampler_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler 1 :: VS Send Pipe Active */
#define ehl__sampler_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler 1 :: PS FPU0 Pipe Active */
#define ehl__sampler_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler 1 :: PS FPU1 Pipe Active */
#define ehl__sampler_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler 1 :: PS Send Pipeline Active */
#define ehl__sampler_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler 1 :: FS Both FPU Active */
#define ehl__sampler_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler 1 :: Rasterized Pixels */
#define ehl__sampler_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler 1 :: Early Hi-Depth Test Fails */
#define ehl__sampler_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler 1 :: Early Depth Test Fails */
#define ehl__sampler_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 1 :: Samples Killed in FS */
#define ehl__sampler_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler 1 :: Pixels Failing Tests */
#define ehl__sampler_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler 1 :: Samples Written */
#define ehl__sampler_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler 1 :: Samples Blended */
#define ehl__sampler_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler 1 :: Sampler Texels */
#define ehl__sampler_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler 1 :: Sampler Texels Misses */
#define ehl__sampler_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler 1 :: SLM Bytes Read */
#define ehl__sampler_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler 1 :: SLM Bytes Written */
#define ehl__sampler_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler 1 :: Shader Memory Accesses */
#define ehl__sampler_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler 1 :: Shader Atomic Memory Accesses */
#define ehl__sampler_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler 1 :: L3 Shader Throughput */
#define ehl__sampler_1__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set Sampler 1 :: Shader Barrier Messages */
#define ehl__sampler_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 1 :: Slice0 Subslice0 Input Available */
#define ehl__sampler_1__sampler00_input_available__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice1 Input Available */
#define ehl__sampler_1__sampler01_input_available__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice2 Input Available */
#define ehl__sampler_1__sampler02_input_available__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice3 Input Available */
#define ehl__sampler_1__sampler03_input_available__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice4 Input Available */
#define ehl__sampler_1__sampler04_input_available__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice5 Input Available */
#define ehl__sampler_1__sampler05_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set Sampler 1 :: Slice0 Subslice6 Input Available */
#define ehl__sampler_1__sampler06_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set Sampler 1 :: Slice0 Subslice7 Input Available */
#define ehl__sampler_1__sampler07_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set Sampler 2 :: GPU Time Elapsed */
#define ehl__sampler_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set Sampler 2 :: GPU Core Clocks */
#define ehl__sampler_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set Sampler 2 :: AVG GPU Core Frequency */
#define ehl__sampler_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set Sampler 2 :: AVG GPU Core Frequency */
#define ehl__sampler_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set Sampler 2 :: GPU Busy */
#define ehl__sampler_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set Sampler 2 :: VS Threads Dispatched */
#define ehl__sampler_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set Sampler 2 :: HS Threads Dispatched */
#define ehl__sampler_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set Sampler 2 :: DS Threads Dispatched */
#define ehl__sampler_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set Sampler 2 :: GS Threads Dispatched */
#define ehl__sampler_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set Sampler 2 :: FS Threads Dispatched */
#define ehl__sampler_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set Sampler 2 :: CS Threads Dispatched */
#define ehl__sampler_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set Sampler 2 :: EU Active */
#define ehl__sampler_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set Sampler 2 :: EU Stall */
#define ehl__sampler_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set Sampler 2 :: EU Both FPU Pipes Active */
#define ehl__sampler_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set Sampler 2 :: VS FPU0 Pipe Active */
#define ehl__sampler_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set Sampler 2 :: VS FPU1 Pipe Active */
#define ehl__sampler_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set Sampler 2 :: VS Send Pipe Active */
#define ehl__sampler_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set Sampler 2 :: PS FPU0 Pipe Active */
#define ehl__sampler_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set Sampler 2 :: PS FPU1 Pipe Active */
#define ehl__sampler_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set Sampler 2 :: PS Send Pipeline Active */
#define ehl__sampler_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set Sampler 2 :: FS Both FPU Active */
#define ehl__sampler_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set Sampler 2 :: Rasterized Pixels */
#define ehl__sampler_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set Sampler 2 :: Early Hi-Depth Test Fails */
#define ehl__sampler_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set Sampler 2 :: Early Depth Test Fails */
#define ehl__sampler_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 2 :: Samples Killed in FS */
#define ehl__sampler_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set Sampler 2 :: Pixels Failing Tests */
#define ehl__sampler_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set Sampler 2 :: Samples Written */
#define ehl__sampler_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set Sampler 2 :: Samples Blended */
#define ehl__sampler_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set Sampler 2 :: Sampler Texels */
#define ehl__sampler_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set Sampler 2 :: Sampler Texels Misses */
#define ehl__sampler_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set Sampler 2 :: SLM Bytes Read */
#define ehl__sampler_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set Sampler 2 :: SLM Bytes Written */
#define ehl__sampler_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set Sampler 2 :: Shader Memory Accesses */
#define ehl__sampler_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set Sampler 2 :: Shader Atomic Memory Accesses */
#define ehl__sampler_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set Sampler 2 :: L3 Shader Throughput */
#define ehl__sampler_2__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set Sampler 2 :: Shader Barrier Messages */
#define ehl__sampler_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set Sampler 2 :: Slice0 Subslice0 Sampler Output Ready */
#define ehl__sampler_2__sampler00_output_ready__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set Sampler 2 :: Slice0 Subslice1 Sampler Output Ready */
#define ehl__sampler_2__sampler01_output_ready__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice2 Sampler Output Ready */
#define ehl__sampler_2__sampler02_output_ready__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice3 Sampler Output Ready */
#define ehl__sampler_2__sampler03_output_ready__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice4 Sampler Output Ready */
#define ehl__sampler_2__sampler04_output_ready__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice5 Sampler Output Ready */
#define ehl__sampler_2__sampler05_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set Sampler 2 :: Slice0 Subslice6 Sampler Output Ready */
#define ehl__sampler_2__sampler06_output_ready__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set Sampler 2 :: Slice0 Subslice7 Sampler Output Ready */
#define ehl__sampler_2__sampler07_output_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set Sampler 2 :: SQ is full */
#define ehl__sampler_2__gt_request_queue_full__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set TDL_1 :: GPU Time Elapsed */
#define ehl__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_1 :: GPU Core Clocks */
#define ehl__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define ehl__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_1 :: AVG GPU Core Frequency */
#define ehl__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_1 :: GPU Busy */
#define ehl__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_1 :: VS Threads Dispatched */
#define ehl__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_1 :: HS Threads Dispatched */
#define ehl__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_1 :: DS Threads Dispatched */
#define ehl__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_1 :: GS Threads Dispatched */
#define ehl__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_1 :: FS Threads Dispatched */
#define ehl__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_1 :: CS Threads Dispatched */
#define ehl__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_1 :: EU Active */
#define ehl__tdl_1__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_1 :: EU Stall */
#define ehl__tdl_1__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_1 :: EU Both FPU Pipes Active */
#define ehl__tdl_1__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_1 :: VS FPU0 Pipe Active */
#define ehl__tdl_1__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_1 :: VS FPU1 Pipe Active */
#define ehl__tdl_1__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_1 :: VS Send Pipe Active */
#define ehl__tdl_1__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_1 :: VS EU Active */
#define ehl__tdl_1__vs_eu_active__read \
   bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read

/* Metric set TDL_1 :: VS AVG Active per Thread */
#define ehl__tdl_1__vs_eu_active_per_thread__read \
   icl__tdl_1__vs_eu_active_per_thread__read

/* Metric set TDL_1 :: VS EU Stall */
#define ehl__tdl_1__vs_eu_stall__read \
   bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read

/* Metric set TDL_1 :: VS AVG Stall per Thread */
#define ehl__tdl_1__vs_eu_stall_per_thread__read \
   icl__tdl_1__vs_eu_stall_per_thread__read

/* Metric set TDL_1 :: PS FPU0 Pipe Active */
#define ehl__tdl_1__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_1 :: PS FPU1 Pipe Active */
#define ehl__tdl_1__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_1 :: PS Send Pipeline Active */
#define ehl__tdl_1__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_1 :: FS Both FPU Active */
#define ehl__tdl_1__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_1 :: FS EU Active */
#define ehl__tdl_1__ps_eu_active__read \
   bdw__compute_l3_cache__eu_move_fpu0_instruction__read

/* Metric set TDL_1 :: FS AVG Active per Thread */
#define ehl__tdl_1__ps_eu_active_per_thread__read \
   icl__tdl_1__ps_eu_active_per_thread__read

/* Metric set TDL_1 :: FS EU Stall */
#define ehl__tdl_1__ps_eu_stall__read \
   bdw__compute_l3_cache__eu_move_fpu1_instruction__read

/* Metric set TDL_1 :: FS AVG Stall per Thread */
#define ehl__tdl_1__ps_eu_stall_per_thread__read \
   icl__tdl_1__ps_eu_stall_per_thread__read

/* Metric set TDL_1 :: Rasterized Pixels */
#define ehl__tdl_1__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_1 :: Early Hi-Depth Test Fails */
#define ehl__tdl_1__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_1 :: Early Depth Test Fails */
#define ehl__tdl_1__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: Samples Killed in FS */
#define ehl__tdl_1__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_1 :: Pixels Failing Tests */
#define ehl__tdl_1__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_1 :: Samples Written */
#define ehl__tdl_1__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_1 :: Samples Blended */
#define ehl__tdl_1__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_1 :: Sampler Texels */
#define ehl__tdl_1__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_1 :: Sampler Texels Misses */
#define ehl__tdl_1__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_1 :: SLM Bytes Read */
#define ehl__tdl_1__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_1 :: SLM Bytes Written */
#define ehl__tdl_1__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_1 :: Shader Memory Accesses */
#define ehl__tdl_1__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_1 :: Shader Atomic Memory Accesses */
#define ehl__tdl_1__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_1 :: L3 Shader Throughput */
#define ehl__tdl_1__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set TDL_1 :: Shader Barrier Messages */
#define ehl__tdl_1__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher */
#define ehl__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher */
#define ehl__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher */
#define ehl__tdl_1__ps_thread02_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher */
#define ehl__tdl_1__ps_thread03_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher */
#define ehl__tdl_1__ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher */
#define ehl__tdl_1__ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher */
#define ehl__tdl_1__ps_thread06_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_1 :: PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher */
#define ehl__tdl_1__ps_thread07_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_2 :: GPU Time Elapsed */
#define ehl__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_2 :: GPU Core Clocks */
#define ehl__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define ehl__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_2 :: AVG GPU Core Frequency */
#define ehl__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_2 :: GPU Busy */
#define ehl__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_2 :: VS Threads Dispatched */
#define ehl__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_2 :: HS Threads Dispatched */
#define ehl__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_2 :: DS Threads Dispatched */
#define ehl__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_2 :: GS Threads Dispatched */
#define ehl__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_2 :: FS Threads Dispatched */
#define ehl__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_2 :: CS Threads Dispatched */
#define ehl__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_2 :: EU Active */
#define ehl__tdl_2__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_2 :: EU Stall */
#define ehl__tdl_2__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_2 :: EU Both FPU Pipes Active */
#define ehl__tdl_2__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_2 :: VS FPU0 Pipe Active */
#define ehl__tdl_2__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_2 :: VS FPU1 Pipe Active */
#define ehl__tdl_2__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_2 :: VS Send Pipe Active */
#define ehl__tdl_2__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_2 :: PS FPU0 Pipe Active */
#define ehl__tdl_2__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_2 :: PS FPU1 Pipe Active */
#define ehl__tdl_2__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_2 :: PS Send Pipeline Active */
#define ehl__tdl_2__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_2 :: FS Both FPU Active */
#define ehl__tdl_2__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_2 :: Rasterized Pixels */
#define ehl__tdl_2__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_2 :: Early Hi-Depth Test Fails */
#define ehl__tdl_2__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_2 :: Early Depth Test Fails */
#define ehl__tdl_2__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Samples Killed in FS */
#define ehl__tdl_2__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_2 :: Pixels Failing Tests */
#define ehl__tdl_2__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_2 :: Samples Written */
#define ehl__tdl_2__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_2 :: Samples Blended */
#define ehl__tdl_2__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_2 :: Sampler Texels */
#define ehl__tdl_2__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_2 :: Sampler Texels Misses */
#define ehl__tdl_2__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_2 :: SLM Bytes Read */
#define ehl__tdl_2__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_2 :: SLM Bytes Written */
#define ehl__tdl_2__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_2 :: Shader Memory Accesses */
#define ehl__tdl_2__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_2 :: Shader Atomic Memory Accesses */
#define ehl__tdl_2__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_2 :: L3 Shader Throughput */
#define ehl__tdl_2__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set TDL_2 :: Shader Barrier Messages */
#define ehl__tdl_2__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread00_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread00_ready_for_dispatch__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread01_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread01_ready_for_dispatch__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread02_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread02_ready_for_dispatch__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread03_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread03_ready_for_dispatch__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread04_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread04_ready_for_dispatch__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread05_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread05_ready_for_dispatch__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread06_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread06_ready_for_dispatch__read

/* Metric set TDL_2 :: Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher */
#define ehl__tdl_2__non_ps_thread07_ready_for_dispatch__read \
   icl__tdl_2__non_ps_thread07_ready_for_dispatch__read

/* Metric set TDL_2 :: SQ is full */
#define ehl__tdl_2__gt_request_queue_full__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set TDL_3 :: GPU Time Elapsed */
#define ehl__tdl_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TDL_3 :: GPU Core Clocks */
#define ehl__tdl_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TDL_3 :: AVG GPU Core Frequency */
#define ehl__tdl_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TDL_3 :: AVG GPU Core Frequency */
#define ehl__tdl_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TDL_3 :: GPU Busy */
#define ehl__tdl_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set TDL_3 :: VS Threads Dispatched */
#define ehl__tdl_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set TDL_3 :: HS Threads Dispatched */
#define ehl__tdl_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set TDL_3 :: DS Threads Dispatched */
#define ehl__tdl_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set TDL_3 :: GS Threads Dispatched */
#define ehl__tdl_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set TDL_3 :: FS Threads Dispatched */
#define ehl__tdl_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set TDL_3 :: CS Threads Dispatched */
#define ehl__tdl_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set TDL_3 :: EU Active */
#define ehl__tdl_3__eu_active__read \
   bdw__render_basic__eu_active__read

/* Metric set TDL_3 :: EU Stall */
#define ehl__tdl_3__eu_stall__read \
   bdw__render_basic__eu_stall__read

/* Metric set TDL_3 :: EU Both FPU Pipes Active */
#define ehl__tdl_3__eu_fpu_both_active__read \
   bdw__render_basic__eu_fpu_both_active__read

/* Metric set TDL_3 :: VS FPU0 Pipe Active */
#define ehl__tdl_3__vs_fpu0_active__read \
   bdw__render_basic__vs_fpu0_active__read

/* Metric set TDL_3 :: VS FPU1 Pipe Active */
#define ehl__tdl_3__vs_fpu1_active__read \
   bdw__render_basic__vs_fpu1_active__read

/* Metric set TDL_3 :: VS Send Pipe Active */
#define ehl__tdl_3__vs_send_active__read \
   bdw__render_basic__vs_send_active__read

/* Metric set TDL_3 :: PS FPU0 Pipe Active */
#define ehl__tdl_3__ps_fpu0_active__read \
   bdw__render_basic__ps_fpu0_active__read

/* Metric set TDL_3 :: PS FPU1 Pipe Active */
#define ehl__tdl_3__ps_fpu1_active__read \
   bdw__render_basic__ps_fpu1_active__read

/* Metric set TDL_3 :: PS Send Pipeline Active */
#define ehl__tdl_3__ps_send_active__read \
   bdw__render_basic__ps_send_active__read

/* Metric set TDL_3 :: FS Both FPU Active */
#define ehl__tdl_3__ps_eu_both_fpu_active__read \
   bdw__render_basic__ps_eu_both_fpu_active__read

/* Metric set TDL_3 :: Rasterized Pixels */
#define ehl__tdl_3__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set TDL_3 :: Early Hi-Depth Test Fails */
#define ehl__tdl_3__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set TDL_3 :: Early Depth Test Fails */
#define ehl__tdl_3__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set TDL_3 :: Samples Killed in FS */
#define ehl__tdl_3__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set TDL_3 :: Pixels Failing Tests */
#define ehl__tdl_3__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set TDL_3 :: Samples Written */
#define ehl__tdl_3__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set TDL_3 :: Samples Blended */
#define ehl__tdl_3__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set TDL_3 :: Sampler Texels */
#define ehl__tdl_3__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set TDL_3 :: Sampler Texels Misses */
#define ehl__tdl_3__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set TDL_3 :: SLM Bytes Read */
#define ehl__tdl_3__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set TDL_3 :: SLM Bytes Written */
#define ehl__tdl_3__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set TDL_3 :: Shader Memory Accesses */
#define ehl__tdl_3__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set TDL_3 :: Shader Atomic Memory Accesses */
#define ehl__tdl_3__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set TDL_3 :: L3 Shader Throughput */
#define ehl__tdl_3__l3_shader_throughput__read \
   ehl__render_basic__l3_shader_throughput__read

/* Metric set TDL_3 :: Shader Barrier Messages */
#define ehl__tdl_3__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header00_ready_port0__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header00_ready_port1__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header01_ready_port0__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header01_ready_port1__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header03_ready_port0__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header03_ready_port1__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header04_ready_port0__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header04_ready_port1__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header05_ready_port0__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header05_ready_port1__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header06_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header06_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0 */
#define ehl__tdl_3__thread_header07_ready_port0__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set TDL_3 :: Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1 */
#define ehl__tdl_3__thread_header07_ready_port1__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: GPU Time Elapsed */
#define ehl__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Gpu Rings Busyness :: GPU Core Clocks */
#define ehl__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define ehl__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Gpu Rings Busyness :: AVG GPU Core Frequency */
#define ehl__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Gpu Rings Busyness :: GPU Busy */
#define ehl__gpu_busyness__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Gpu Rings Busyness :: Render Ring Busy */
#define ehl__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Gpu Rings Busyness :: Vdbox0 Ring Busy */
#define ehl__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Gpu Rings Busyness :: Vdbox1 Ring Busy */
#define ehl__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* Gpu Rings Busyness :: Vebox Ring Busy */
#define ehl__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* Gpu Rings Busyness :: Blitter Ring Busy */
#define ehl__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* Gpu Rings Busyness :: AnyRingBusy */
#define ehl__gpu_busyness__any_ring_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set TestOa :: GPU Time Elapsed */
#define ehl__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TestOa :: GPU Core Clocks */
#define ehl__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define ehl__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define ehl__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TestOa :: TestCounter0 */
#define ehl__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Metric set TestOa :: TestCounter1 */
#define ehl__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Metric set TestOa :: TestCounter2 */
#define ehl__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Metric set TestOa :: TestCounter3 */
#define ehl__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Metric set TestOa :: TestCounter4 */
#define ehl__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Metric set TestOa :: TestCounter5 */
#define ehl__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Metric set TestOa :: TestCounter6 */
#define ehl__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Metric set TestOa :: TestCounter7 */
#define ehl__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set TestOa :: TestCounter8 */
#define ehl__test_oa__counter8__read \
   hsw__compute_extended__typed_writes0__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define tglgt1__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define tglgt1__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt1__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt1__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define tglgt1__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define tglgt1__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define tglgt1__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define tglgt1__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define tglgt1__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define tglgt1__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define tglgt1__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
static float
tglgt1__render_basic__eu_active__read(UNUSED struct intel_perf_config *perf,
                                      const struct intel_perf_query_info *query,
                                      const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ A 8 READ FADD A 9 READ FADD A 10 READ FADD $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = results->accumulator[query->a_offset + 8];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 9];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->a_offset + 10];
   double tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = tmp6;
   uint64_t tmp8 = perf->sys_vars.n_eus;
   uint64_t tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt1__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* Render Metrics Basic set :: EU Stall */
static float
tglgt1__render_basic__eu_stall__read(UNUSED struct intel_perf_config *perf,
                                     const struct intel_perf_query_info *query,
                                     const struct intel_perf_query_result *results)
{
   /* RPN equation: A 11 READ A 12 READ FADD A 13 READ FADD A 14 READ FADD $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 11];
   uint64_t tmp1 = results->accumulator[query->a_offset + 12];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 13];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->a_offset + 14];
   double tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = tmp6;
   uint64_t tmp8 = perf->sys_vars.n_eus;
   uint64_t tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt1__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* Render Metrics Basic set :: EU Thread Occupancy */
static float
tglgt1__render_basic__eu_thread_occupancy__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: 8 A 15 READ A 16 READ FADD A 17 READ FADD A 18 READ FADD FMUL $EuThreadsCount FDIV $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 15];
   uint64_t tmp1 = results->accumulator[query->a_offset + 16];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 17];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->a_offset + 18];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = 8 * tmp6;
   double tmp8 = tmp7;
   double tmp9 = perf->sys_vars.eu_threads_count;
   double tmp10 = tmp9 ? tmp8 / tmp9 : 0;
   uint64_t tmp11 = tmp10;
   uint64_t tmp12 = perf->sys_vars.n_eus;
   uint64_t tmp13 = tmp12 ? tmp11 / tmp12 : 0;
   uint64_t tmp14 = tmp13 * 100;
   double tmp15 = tmp14;
   double tmp16 = tglgt1__render_basic__gpu_core_clocks__read(perf, query, results);
   double tmp17 = tmp16 ? tmp15 / tmp16 : 0;

   return tmp17;
}

/* Render Metrics Basic set :: Sampler00 Busy */
#define tglgt1__render_basic__sampler00_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler Slice0 Dualsubslice0 is bottleneck */
#define tglgt1__render_basic__sampler00_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define tglgt1__render_basic__samplers_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define tglgt1__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define tglgt1__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define tglgt1__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define tglgt1__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define tglgt1__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define tglgt1__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define tglgt1__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define tglgt1__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define tglgt1__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define tglgt1__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define tglgt1__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define tglgt1__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define tglgt1__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define tglgt1__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define tglgt1__render_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define tglgt1__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Read Throughput */
static uint64_t
tglgt1__render_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                                const struct intel_perf_query_info *query,
                                                const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  C 5 READ C 4 READ UADD C 3 READ UADD C 2 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = results->accumulator[query->c_offset + 4];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 3];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 2];
   uint64_t tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = 64 * tmp6;

   return tmp7;
}

/* Render Metrics Basic set :: GTI Write Throughput */
static uint64_t
tglgt1__render_basic__gti_write_throughput__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  C 1 READ C 0 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 1];
   uint64_t tmp1 = results->accumulator[query->c_offset + 0];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = 64 * tmp2;

   return tmp3;
}

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define tglgt1__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define tglgt1__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt1__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt1__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define tglgt1__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define tglgt1__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define tglgt1__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define tglgt1__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define tglgt1__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define tglgt1__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define tglgt1__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define tglgt1__compute_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define tglgt1__compute_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define tglgt1__compute_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define tglgt1__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define tglgt1__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define tglgt1__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define tglgt1__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define tglgt1__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define tglgt1__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define tglgt1__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define tglgt1__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define tglgt1__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define tglgt1__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define tglgt1__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define tglgt1__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define tglgt1__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define tglgt1__compute_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define tglgt1__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: GTI Read Throughput */
static uint64_t
tglgt1__compute_basic__gti_read_throughput__read(UNUSED struct intel_perf_config *perf,
                                                 const struct intel_perf_query_info *query,
                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  C 7 READ C 6 READ UADD C 5 READ UADD C 4 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 5];
   uint64_t tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 4];
   uint64_t tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = 64 * tmp6;

   return tmp7;
}

/* Compute Metrics Basic set :: GTI Write Throughput */
static uint64_t
tglgt1__compute_basic__gti_write_throughput__read(UNUSED struct intel_perf_config *perf,
                                                  const struct intel_perf_query_info *query,
                                                  const struct intel_perf_query_result *results)
{
   /* RPN equation: 64  C 3 READ C 2 READ UADD UMUL */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   uint64_t tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = 64 * tmp2;

   return tmp3;
}

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define tglgt1__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define tglgt1__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define tglgt1__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define tglgt1__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define tglgt1__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define tglgt1__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define tglgt1__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define tglgt1__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define tglgt1__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define tglgt1__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define tglgt1__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define tglgt1__render_pipe_profile__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define tglgt1__render_pipe_profile__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: EU Thread Occupancy */
#define tglgt1__render_pipe_profile__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define tglgt1__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define tglgt1__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define tglgt1__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define tglgt1__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define tglgt1__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define tglgt1__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define tglgt1__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define tglgt1__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define tglgt1__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define tglgt1__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define tglgt1__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define tglgt1__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define tglgt1__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define tglgt1__render_pipe_profile__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define tglgt1__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define tglgt1__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define tglgt1__render_pipe_profile__vs_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define tglgt1__render_pipe_profile__hs_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define tglgt1__render_pipe_profile__ds_bottleneck__read \
   bdw__render_pipe_profile__sf_stall__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define tglgt1__render_pipe_profile__gs_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define tglgt1__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define tglgt1__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define tglgt1__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define tglgt1__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define tglgt1__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define tglgt1__render_pipe_profile__hs_stall__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define tglgt1__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define tglgt1__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define tglgt1__render_pipe_profile__cl_stall__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define tglgt1__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define tglgt1__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define tglgt1__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define tglgt1__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define tglgt1__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define tglgt1__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define tglgt1__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define tglgt1__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define tglgt1__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define tglgt1__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define tglgt1__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define tglgt1__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define tglgt1__hdc_and_sf__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define tglgt1__hdc_and_sf__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Thread Occupancy */
#define tglgt1__hdc_and_sf__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define tglgt1__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define tglgt1__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define tglgt1__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define tglgt1__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define tglgt1__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define tglgt1__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define tglgt1__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define tglgt1__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define tglgt1__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define tglgt1__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define tglgt1__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define tglgt1__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define tglgt1__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define tglgt1__hdc_and_sf__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define tglgt1__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3 */
#define tglgt1__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3 */
#define tglgt1__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define tglgt1__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define tglgt1__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define tglgt1__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define tglgt1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define tglgt1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define tglgt1__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define tglgt1__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define tglgt1__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define tglgt1__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define tglgt1__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define tglgt1__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define tglgt1__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define tglgt1__rasterizer_and_pixel_backend__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define tglgt1__rasterizer_and_pixel_backend__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Thread Occupancy */
#define tglgt1__rasterizer_and_pixel_backend__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define tglgt1__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define tglgt1__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define tglgt1__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define tglgt1__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define tglgt1__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define tglgt1__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define tglgt1__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define tglgt1__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define tglgt1__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define tglgt1__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define tglgt1__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define tglgt1__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define tglgt1__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define tglgt1__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define tglgt1__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define tglgt1__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define tglgt1__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Post-EarlyZ Pixel Data Ready */
#define tglgt1__rasterizer_and_pixel_backend__pixel_data00_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 PS Output Available */
#define tglgt1__rasterizer_and_pixel_backend__ps_output00_available__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Pixel Values Ready */
#define tglgt1__rasterizer_and_pixel_backend__pixel_values00_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ00 is full */
#define tglgt1__rasterizer_and_pixel_backend__gt_request_queue00_full__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ01 is full */
#define tglgt1__rasterizer_and_pixel_backend__gt_request_queue01_full__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ10 is full */
#define tglgt1__rasterizer_and_pixel_backend__gt_request_queue10_full__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ11 is full */
#define tglgt1__rasterizer_and_pixel_backend__gt_request_queue11_full__read \
   bdw__render_pipe_profile__hs_stall__read

/* L3_1 :: GPU Time Elapsed */
#define tglgt1__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_1 :: GPU Core Clocks */
#define tglgt1__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_1 :: AVG GPU Core Frequency */
#define tglgt1__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_1 :: AVG GPU Core Frequency */
#define tglgt1__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_1 :: GPU Busy */
#define tglgt1__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_1 :: EU Active */
#define tglgt1__l3_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_1 :: EU Stall */
#define tglgt1__l3_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_1 :: EU Thread Occupancy */
#define tglgt1__l3_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_1 :: VS Threads Dispatched */
#define tglgt1__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_1 :: HS Threads Dispatched */
#define tglgt1__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_1 :: DS Threads Dispatched */
#define tglgt1__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_1 :: GS Threads Dispatched */
#define tglgt1__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_1 :: FS Threads Dispatched */
#define tglgt1__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_1 :: CS Threads Dispatched */
#define tglgt1__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_1 :: Slice0 L3 Bank0 Input Available */
static float
tglgt1__l3_1__l30_bank0_input_available__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 0 READ C 1 READ FADD C 2 READ FADD C 3 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 1];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 2];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 3];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt1__l3_1__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* L3_1 :: Slice0 L3 Bank1 Input Available */
static float
tglgt1__l3_1__l30_bank1_input_available__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 4 READ C 5 READ FADD C 6 READ FADD C 7 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 4];
   uint64_t tmp1 = results->accumulator[query->c_offset + 5];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 6];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 7];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt1__l3_1__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* L3_2 :: GPU Time Elapsed */
#define tglgt1__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_2 :: GPU Core Clocks */
#define tglgt1__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_2 :: AVG GPU Core Frequency */
#define tglgt1__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_2 :: AVG GPU Core Frequency */
#define tglgt1__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_2 :: GPU Busy */
#define tglgt1__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_2 :: VS Threads Dispatched */
#define tglgt1__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_2 :: HS Threads Dispatched */
#define tglgt1__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_2 :: DS Threads Dispatched */
#define tglgt1__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_2 :: GS Threads Dispatched */
#define tglgt1__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_2 :: FS Threads Dispatched */
#define tglgt1__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_2 :: CS Threads Dispatched */
#define tglgt1__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_2 :: EU Active */
#define tglgt1__l3_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_2 :: EU Stall */
#define tglgt1__l3_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_2 :: EU Thread Occupancy */
#define tglgt1__l3_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_2 :: Slice0 L3 Bank2 Input Available */
static float
tglgt1__l3_2__l30_bank2_input_available__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 3 READ B 2 READ FADD B 1 READ FADD B 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 3];
   uint64_t tmp1 = results->accumulator[query->b_offset + 2];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 1];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->b_offset + 0];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt1__l3_2__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* L3_2 :: Slice0 L3 Bank3 Input Available */
static float
tglgt1__l3_2__l30_bank3_input_available__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: B 7 READ B 6 READ FADD B 5 READ FADD B 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->b_offset + 7];
   uint64_t tmp1 = results->accumulator[query->b_offset + 6];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->b_offset + 5];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->b_offset + 4];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt1__l3_2__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* L3_3 :: GPU Time Elapsed */
#define tglgt1__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_3 :: GPU Core Clocks */
#define tglgt1__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_3 :: AVG GPU Core Frequency */
#define tglgt1__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_3 :: AVG GPU Core Frequency */
#define tglgt1__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_3 :: GPU Busy */
#define tglgt1__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_3 :: VS Threads Dispatched */
#define tglgt1__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_3 :: HS Threads Dispatched */
#define tglgt1__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_3 :: DS Threads Dispatched */
#define tglgt1__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_3 :: GS Threads Dispatched */
#define tglgt1__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_3 :: FS Threads Dispatched */
#define tglgt1__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_3 :: CS Threads Dispatched */
#define tglgt1__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_3 :: EU Active */
#define tglgt1__l3_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_3 :: EU Stall */
#define tglgt1__l3_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_3 :: EU Thread Occupancy */
#define tglgt1__l3_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_3 :: Slice0 L3 Bank0 Output Ready */
static float
tglgt1__l3_3__l30_bank0_output_ready__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ C 6 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 4;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   uint64_t tmp6 = tmp5 * 100;
   double tmp7 = tmp6;
   double tmp8 = tglgt1__l3_3__gpu_core_clocks__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* L3_4 :: GPU Time Elapsed */
#define tglgt1__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_4 :: GPU Core Clocks */
#define tglgt1__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_4 :: AVG GPU Core Frequency */
#define tglgt1__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_4 :: AVG GPU Core Frequency */
#define tglgt1__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_4 :: GPU Busy */
#define tglgt1__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_4 :: VS Threads Dispatched */
#define tglgt1__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_4 :: HS Threads Dispatched */
#define tglgt1__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_4 :: DS Threads Dispatched */
#define tglgt1__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_4 :: GS Threads Dispatched */
#define tglgt1__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_4 :: FS Threads Dispatched */
#define tglgt1__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_4 :: CS Threads Dispatched */
#define tglgt1__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_4 :: EU Active */
#define tglgt1__l3_4__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_4 :: EU Stall */
#define tglgt1__l3_4__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_4 :: EU Thread Occupancy */
#define tglgt1__l3_4__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_4 :: Slice0 L3 Bank1 Output Ready */
static float
tglgt1__l3_4__l30_bank1_output_ready__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: C 5 READ C 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 5];
   uint64_t tmp1 = results->accumulator[query->c_offset + 4];
   double tmp2 = tmp0 + tmp1;
   double tmp3 = tmp2;
   double tmp4 = 4;
   double tmp5 = tmp4 ? tmp3 / tmp4 : 0;
   uint64_t tmp6 = tmp5 * 100;
   double tmp7 = tmp6;
   double tmp8 = tglgt1__l3_4__gpu_core_clocks__read(perf, query, results);
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;

   return tmp9;
}

/* L3_5 :: GPU Time Elapsed */
#define tglgt1__l3_5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_5 :: GPU Core Clocks */
#define tglgt1__l3_5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_5 :: AVG GPU Core Frequency */
#define tglgt1__l3_5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_5 :: AVG GPU Core Frequency */
#define tglgt1__l3_5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_5 :: GPU Busy */
#define tglgt1__l3_5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_5 :: VS Threads Dispatched */
#define tglgt1__l3_5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_5 :: HS Threads Dispatched */
#define tglgt1__l3_5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_5 :: DS Threads Dispatched */
#define tglgt1__l3_5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_5 :: GS Threads Dispatched */
#define tglgt1__l3_5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_5 :: FS Threads Dispatched */
#define tglgt1__l3_5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_5 :: CS Threads Dispatched */
#define tglgt1__l3_5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_5 :: EU Active */
#define tglgt1__l3_5__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_5 :: EU Stall */
#define tglgt1__l3_5__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_5 :: EU Thread Occupancy */
#define tglgt1__l3_5__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_5 :: Slice0 L3 Bank2 Output Ready */
#define tglgt1__l3_5__l30_bank2_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_6 :: GPU Time Elapsed */
#define tglgt1__l3_6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_6 :: GPU Core Clocks */
#define tglgt1__l3_6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_6 :: AVG GPU Core Frequency */
#define tglgt1__l3_6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_6 :: AVG GPU Core Frequency */
#define tglgt1__l3_6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_6 :: GPU Busy */
#define tglgt1__l3_6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_6 :: VS Threads Dispatched */
#define tglgt1__l3_6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_6 :: HS Threads Dispatched */
#define tglgt1__l3_6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_6 :: DS Threads Dispatched */
#define tglgt1__l3_6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_6 :: GS Threads Dispatched */
#define tglgt1__l3_6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_6 :: FS Threads Dispatched */
#define tglgt1__l3_6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_6 :: CS Threads Dispatched */
#define tglgt1__l3_6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_6 :: EU Active */
#define tglgt1__l3_6__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_6 :: EU Stall */
#define tglgt1__l3_6__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_6 :: EU Thread Occupancy */
#define tglgt1__l3_6__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_6 :: Slice0 L3 Bank3 Output Ready */
#define tglgt1__l3_6__l30_bank3_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* Sampler_1 :: GPU Time Elapsed */
#define tglgt1__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_1 :: GPU Core Clocks */
#define tglgt1__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define tglgt1__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define tglgt1__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_1 :: GPU Busy */
#define tglgt1__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_1 :: VS Threads Dispatched */
#define tglgt1__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_1 :: HS Threads Dispatched */
#define tglgt1__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_1 :: DS Threads Dispatched */
#define tglgt1__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_1 :: GS Threads Dispatched */
#define tglgt1__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_1 :: FS Threads Dispatched */
#define tglgt1__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_1 :: CS Threads Dispatched */
#define tglgt1__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_1 :: EU Active */
#define tglgt1__sampler_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_1 :: EU Stall */
#define tglgt1__sampler_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_1 :: EU Thread Occupancy */
#define tglgt1__sampler_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_1 :: Slice0 DualSubslice0 Input Available */
#define tglgt1__sampler_1__sampler00_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Sampler_1 :: Slice0 DualSubslice1 Input Available */
#define tglgt1__sampler_1__sampler01_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_1 :: Slice0 DualSubslice0 Sampler Output Ready */
#define tglgt1__sampler_1__sampler00_output_ready__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice1 Sampler Output Ready */
#define tglgt1__sampler_1__sampler01_output_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_1 :: GPU Time Elapsed */
#define tglgt1__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_1 :: GPU Core Clocks */
#define tglgt1__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_1 :: AVG GPU Core Frequency */
#define tglgt1__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_1 :: AVG GPU Core Frequency */
#define tglgt1__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_1 :: GPU Busy */
#define tglgt1__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_1 :: VS Threads Dispatched */
#define tglgt1__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_1 :: HS Threads Dispatched */
#define tglgt1__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_1 :: DS Threads Dispatched */
#define tglgt1__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_1 :: GS Threads Dispatched */
#define tglgt1__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_1 :: FS Threads Dispatched */
#define tglgt1__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_1 :: CS Threads Dispatched */
#define tglgt1__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_1 :: EU Active */
#define tglgt1__tdl_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_1 :: EU Stall */
#define tglgt1__tdl_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_1 :: EU Thread Occupancy */
#define tglgt1__tdl_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher */
#define tglgt1__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher */
#define tglgt1__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_1 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher */
#define tglgt1__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher */
#define tglgt1__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0 */
#define tglgt1__tdl_1__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1 */
#define tglgt1__tdl_1__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2 */
#define tglgt1__tdl_1__thread_header00_ready_port2__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3 */
#define tglgt1__tdl_1__thread_header00_ready_port3__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0 */
#define tglgt1__tdl_1__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1 */
#define tglgt1__tdl_1__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2 */
#define tglgt1__tdl_1__thread_header01_ready_port2__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3 */
#define tglgt1__tdl_1__thread_header01_ready_port3__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher */
#define tglgt1__tdl_1__thread_header00_ready__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher */
#define tglgt1__tdl_1__thread_header01_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* GpuBusyness :: GPU Time Elapsed */
#define tglgt1__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* GpuBusyness :: GPU Core Clocks */
#define tglgt1__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define tglgt1__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define tglgt1__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* GpuBusyness :: GPU Busy */
#define tglgt1__gpu_busyness__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* GpuBusyness :: EU Active */
#define tglgt1__gpu_busyness__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* GpuBusyness :: EU Stall */
#define tglgt1__gpu_busyness__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* GpuBusyness :: EU Thread Occupancy */
#define tglgt1__gpu_busyness__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* GpuBusyness :: VS Threads Dispatched */
#define tglgt1__gpu_busyness__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* GpuBusyness :: HS Threads Dispatched */
#define tglgt1__gpu_busyness__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* GpuBusyness :: DS Threads Dispatched */
#define tglgt1__gpu_busyness__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* GpuBusyness :: GS Threads Dispatched */
#define tglgt1__gpu_busyness__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* GpuBusyness :: FS Threads Dispatched */
#define tglgt1__gpu_busyness__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* GpuBusyness :: CS Threads Dispatched */
#define tglgt1__gpu_busyness__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* GpuBusyness :: Render Ring Busy */
#define tglgt1__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* GpuBusyness :: Compute Ring Busy */
#define tglgt1__gpu_busyness__compute_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* GpuBusyness :: Posh Ring Busy */
#define tglgt1__gpu_busyness__posh_engine_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* GpuBusyness :: Blitter Ring Busy */
#define tglgt1__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* GpuBusyness :: Vebox Ring Busy */
#define tglgt1__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* GpuBusyness :: Vdbox0 Ring Busy */
#define tglgt1__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* GpuBusyness :: Vdbox1 Ring Busy */
#define tglgt1__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__ds_stall__read

/* GpuBusyness :: Render and compute engines are simultaneously busy */
#define tglgt1__gpu_busyness__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* GpuBusyness :: Any Engine Busy */
#define tglgt1__gpu_busyness__any_engine_busy__read \
   bdw__render_basic__sampler1_busy__read

/* EuActivity1 :: GPU Time Elapsed */
#define tglgt1__eu_activity1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity1 :: GPU Core Clocks */
#define tglgt1__eu_activity1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity1 :: GPU Busy */
#define tglgt1__eu_activity1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity1 :: VS Threads Dispatched */
#define tglgt1__eu_activity1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity1 :: HS Threads Dispatched */
#define tglgt1__eu_activity1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity1 :: DS Threads Dispatched */
#define tglgt1__eu_activity1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity1 :: GS Threads Dispatched */
#define tglgt1__eu_activity1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity1 :: FS Threads Dispatched */
#define tglgt1__eu_activity1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity1 :: CS Threads Dispatched */
#define tglgt1__eu_activity1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity1 :: Render Ring Busy */
#define tglgt1__eu_activity1__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity1 :: Compute Ring Busy */
#define tglgt1__eu_activity1__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity1 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity1__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity1 :: GTI Read Throughput */
#define tglgt1__eu_activity1__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity1 :: GTI Write Throughput */
#define tglgt1__eu_activity1__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity1 :: VS FPU Pipe Active */
#define tglgt1__eu_activity1__vs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity1 :: PS FPU Pipe Active */
#define tglgt1__eu_activity1__ps_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity1 :: EU Send Pipe Active */
static float
tglgt1__eu_activity1__eu_send_active__read(UNUSED struct intel_perf_config *perf,
                                           const struct intel_perf_query_info *query,
                                           const struct intel_perf_query_result *results)
{
   /* RPN equation: A 15 READ A 16 READ FADD A 17 READ FADD A 18 READ FADD $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->a_offset + 15];
   uint64_t tmp1 = results->accumulator[query->a_offset + 16];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 17];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->a_offset + 18];
   double tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = tmp6;
   uint64_t tmp8 = perf->sys_vars.n_eus;
   uint64_t tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt1__eu_activity1__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* EuActivity2 :: GPU Time Elapsed */
#define tglgt1__eu_activity2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity2 :: GPU Core Clocks */
#define tglgt1__eu_activity2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity2 :: GPU Busy */
#define tglgt1__eu_activity2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity2 :: VS Threads Dispatched */
#define tglgt1__eu_activity2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity2 :: HS Threads Dispatched */
#define tglgt1__eu_activity2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity2 :: DS Threads Dispatched */
#define tglgt1__eu_activity2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity2 :: GS Threads Dispatched */
#define tglgt1__eu_activity2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity2 :: FS Threads Dispatched */
#define tglgt1__eu_activity2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity2 :: CS Threads Dispatched */
#define tglgt1__eu_activity2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity2 :: Render Ring Busy */
#define tglgt1__eu_activity2__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity2 :: Compute Ring Busy */
#define tglgt1__eu_activity2__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity2 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity2__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity2 :: GTI Read Throughput */
#define tglgt1__eu_activity2__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity2 :: GTI Write Throughput */
#define tglgt1__eu_activity2__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity2 :: CS EM Pipe Active */
#define tglgt1__eu_activity2__cs_em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: CS FPU Pipe Active */
#define tglgt1__eu_activity2__cs_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity2 :: CS Send Pipeline Active */
#define tglgt1__eu_activity2__cs_send_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: GPU Time Elapsed */
#define tglgt1__eu_activity3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity3 :: GPU Core Clocks */
#define tglgt1__eu_activity3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity3 :: GPU Busy */
#define tglgt1__eu_activity3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity3 :: VS Threads Dispatched */
#define tglgt1__eu_activity3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity3 :: HS Threads Dispatched */
#define tglgt1__eu_activity3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity3 :: DS Threads Dispatched */
#define tglgt1__eu_activity3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity3 :: GS Threads Dispatched */
#define tglgt1__eu_activity3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity3 :: FS Threads Dispatched */
#define tglgt1__eu_activity3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity3 :: CS Threads Dispatched */
#define tglgt1__eu_activity3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity3 :: Render Ring Busy */
#define tglgt1__eu_activity3__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity3 :: Compute Ring Busy */
#define tglgt1__eu_activity3__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity3 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity3__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity3 :: GTI Read Throughput */
#define tglgt1__eu_activity3__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity3 :: GTI Write Throughput */
#define tglgt1__eu_activity3__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity3 :: VS EM Pipe Active */
#define tglgt1__eu_activity3__vs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: PS EM Pipe Active */
#define tglgt1__eu_activity3__ps_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity3 :: PS Send Pipeline Active */
#define tglgt1__eu_activity3__ps_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity4 :: GPU Time Elapsed */
#define tglgt1__eu_activity4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity4 :: GPU Core Clocks */
#define tglgt1__eu_activity4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity4 :: GPU Busy */
#define tglgt1__eu_activity4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity4 :: VS Threads Dispatched */
#define tglgt1__eu_activity4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity4 :: HS Threads Dispatched */
#define tglgt1__eu_activity4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity4 :: DS Threads Dispatched */
#define tglgt1__eu_activity4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity4 :: GS Threads Dispatched */
#define tglgt1__eu_activity4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity4 :: FS Threads Dispatched */
#define tglgt1__eu_activity4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity4 :: CS Threads Dispatched */
#define tglgt1__eu_activity4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity4 :: Render Ring Busy */
#define tglgt1__eu_activity4__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity4 :: Compute Ring Busy */
#define tglgt1__eu_activity4__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity4 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity4__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity4 :: GTI Read Throughput */
#define tglgt1__eu_activity4__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity4 :: GTI Write Throughput */
#define tglgt1__eu_activity4__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity4 :: HS FPU Pipe Active */
#define tglgt1__eu_activity4__hs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity4 :: DS FPU Pipe Active */
#define tglgt1__eu_activity4__ds_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity4 :: VS Send Pipe Active */
#define tglgt1__eu_activity4__vs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity5 :: GPU Time Elapsed */
#define tglgt1__eu_activity5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity5 :: GPU Core Clocks */
#define tglgt1__eu_activity5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity5 :: GPU Busy */
#define tglgt1__eu_activity5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity5 :: VS Threads Dispatched */
#define tglgt1__eu_activity5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity5 :: HS Threads Dispatched */
#define tglgt1__eu_activity5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity5 :: DS Threads Dispatched */
#define tglgt1__eu_activity5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity5 :: GS Threads Dispatched */
#define tglgt1__eu_activity5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity5 :: FS Threads Dispatched */
#define tglgt1__eu_activity5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity5 :: CS Threads Dispatched */
#define tglgt1__eu_activity5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity5 :: Render Ring Busy */
#define tglgt1__eu_activity5__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity5 :: Compute Ring Busy */
#define tglgt1__eu_activity5__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity5 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity5__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity5 :: GTI Read Throughput */
#define tglgt1__eu_activity5__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity5 :: GTI Write Throughput */
#define tglgt1__eu_activity5__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity5 :: HS EM Pipe Active */
#define tglgt1__eu_activity5__hs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity5 :: DS EM Pipe Active */
#define tglgt1__eu_activity5__ds_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity5 :: HS Send Pipe Active */
#define tglgt1__eu_activity5__hs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity6 :: GPU Time Elapsed */
#define tglgt1__eu_activity6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity6 :: GPU Core Clocks */
#define tglgt1__eu_activity6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity6 :: GPU Busy */
#define tglgt1__eu_activity6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity6 :: VS Threads Dispatched */
#define tglgt1__eu_activity6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity6 :: HS Threads Dispatched */
#define tglgt1__eu_activity6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity6 :: DS Threads Dispatched */
#define tglgt1__eu_activity6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity6 :: GS Threads Dispatched */
#define tglgt1__eu_activity6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity6 :: FS Threads Dispatched */
#define tglgt1__eu_activity6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity6 :: CS Threads Dispatched */
#define tglgt1__eu_activity6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity6 :: Render Ring Busy */
#define tglgt1__eu_activity6__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity6 :: Compute Ring Busy */
#define tglgt1__eu_activity6__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity6 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity6__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity6 :: GTI Read Throughput */
#define tglgt1__eu_activity6__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity6 :: GTI Write Throughput */
#define tglgt1__eu_activity6__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity6 :: GS FPU Pipe Active */
#define tglgt1__eu_activity6__gs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity6 :: GS EM Pipe Active */
#define tglgt1__eu_activity6__gs_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity6 :: GS Send Pipe Active */
#define tglgt1__eu_activity6__gs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: GPU Time Elapsed */
#define tglgt1__eu_activity7__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity7 :: GPU Core Clocks */
#define tglgt1__eu_activity7__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity7__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity7__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity7 :: GPU Busy */
#define tglgt1__eu_activity7__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity7 :: VS Threads Dispatched */
#define tglgt1__eu_activity7__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity7 :: HS Threads Dispatched */
#define tglgt1__eu_activity7__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity7 :: DS Threads Dispatched */
#define tglgt1__eu_activity7__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity7 :: GS Threads Dispatched */
#define tglgt1__eu_activity7__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity7 :: FS Threads Dispatched */
#define tglgt1__eu_activity7__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity7 :: CS Threads Dispatched */
#define tglgt1__eu_activity7__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity7 :: EU FPU Pipe Active */
#define tglgt1__eu_activity7__fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity7 :: EM Pipe Active */
#define tglgt1__eu_activity7__em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: EU FPU And EM Pipes Active */
#define tglgt1__eu_activity7__eu_fpu_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity7 :: EU AVG IPC Rate */
static float
tglgt1__eu_activity7__eu_avg_ipc_rate__read(UNUSED struct intel_perf_config *perf,
                                            const struct intel_perf_query_info *query,
                                            const struct intel_perf_query_result *results)
{
   /* RPN equation: A 7 READ A 8 READ FADD A 9 READ FADD A 10 READ FADD  A 11 READ A 12 READ FADD A 13 READ FADD A 14 READ FADD  A 15 READ A 16 READ FADD A 17 READ FADD A 18 READ FADD FADD  A 7 READ A 8 READ FADD A 9 READ FADD A 10 READ FADD FSUB FDIV 1 FADD */
   uint64_t tmp0 = results->accumulator[query->a_offset + 7];
   uint64_t tmp1 = results->accumulator[query->a_offset + 8];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->a_offset + 9];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->a_offset + 10];
   double tmp6 = tmp4 + tmp5;
   uint64_t tmp7 = results->accumulator[query->a_offset + 11];
   uint64_t tmp8 = results->accumulator[query->a_offset + 12];
   double tmp9 = tmp7 + tmp8;
   uint64_t tmp10 = results->accumulator[query->a_offset + 13];
   double tmp11 = tmp9 + tmp10;
   uint64_t tmp12 = results->accumulator[query->a_offset + 14];
   double tmp13 = tmp11 + tmp12;
   uint64_t tmp14 = results->accumulator[query->a_offset + 15];
   uint64_t tmp15 = results->accumulator[query->a_offset + 16];
   double tmp16 = tmp14 + tmp15;
   uint64_t tmp17 = results->accumulator[query->a_offset + 17];
   double tmp18 = tmp16 + tmp17;
   uint64_t tmp19 = results->accumulator[query->a_offset + 18];
   double tmp20 = tmp18 + tmp19;
   double tmp21 = tmp13 + tmp20;
   uint64_t tmp22 = results->accumulator[query->a_offset + 7];
   uint64_t tmp23 = results->accumulator[query->a_offset + 8];
   double tmp24 = tmp22 + tmp23;
   uint64_t tmp25 = results->accumulator[query->a_offset + 9];
   double tmp26 = tmp24 + tmp25;
   uint64_t tmp27 = results->accumulator[query->a_offset + 10];
   double tmp28 = tmp26 + tmp27;
   double tmp29 = tmp21 - tmp28;
   double tmp30 = tmp6;
   double tmp31 = tmp29;
   double tmp32 = tmp31 ? tmp30 / tmp31 : 0;
   double tmp33 = tmp32 + 1;

   return tmp33;
}

/* EuActivity7 :: Render Ring Busy */
#define tglgt1__eu_activity7__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity7 :: Compute Ring Busy */
#define tglgt1__eu_activity7__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity7 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity7__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity7 :: GTI Read Throughput */
#define tglgt1__eu_activity7__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity7 :: GTI Write Throughput */
#define tglgt1__eu_activity7__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: GPU Time Elapsed */
#define tglgt1__eu_activity8__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity8 :: GPU Core Clocks */
#define tglgt1__eu_activity8__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity8__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define tglgt1__eu_activity8__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity8 :: GPU Busy */
#define tglgt1__eu_activity8__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity8 :: VS Threads Dispatched */
#define tglgt1__eu_activity8__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity8 :: HS Threads Dispatched */
#define tglgt1__eu_activity8__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity8 :: DS Threads Dispatched */
#define tglgt1__eu_activity8__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity8 :: GS Threads Dispatched */
#define tglgt1__eu_activity8__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity8 :: FS Threads Dispatched */
#define tglgt1__eu_activity8__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity8 :: CS Threads Dispatched */
#define tglgt1__eu_activity8__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity8 :: Render Ring Busy */
#define tglgt1__eu_activity8__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity8 :: Compute Ring Busy */
#define tglgt1__eu_activity8__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity8 :: Render and compute engines are simultaneously busy */
#define tglgt1__eu_activity8__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity8 :: GTI Read Throughput */
#define tglgt1__eu_activity8__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity8 :: GTI Write Throughput */
#define tglgt1__eu_activity8__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: DS Send Pipe Active */
#define tglgt1__eu_activity8__ds_send_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set TestOa :: GPU Time Elapsed */
#define tglgt1__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TestOa :: GPU Core Clocks */
#define tglgt1__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define tglgt1__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define tglgt1__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TestOa :: TestCounter0 */
#define tglgt1__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Metric set TestOa :: TestCounter1 */
#define tglgt1__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Metric set TestOa :: TestCounter2 */
#define tglgt1__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Metric set TestOa :: TestCounter3 */
#define tglgt1__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Metric set TestOa :: TestCounter4 */
#define tglgt1__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Metric set TestOa :: TestCounter5 */
#define tglgt1__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Metric set TestOa :: TestCounter6 */
#define tglgt1__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Metric set TestOa :: TestCounter7 */
#define tglgt1__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set TestOa :: TestCounter8 */
#define tglgt1__test_oa__counter8__read \
   hsw__compute_extended__typed_writes0__read

/* Metric set TestOa :: TestCounter9 - OAR enable */
#define tglgt1__test_oa__counter9__read \
   hsw__compute_extended__untyped_writes0__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define tglgt2__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define tglgt2__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt2__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt2__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define tglgt2__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define tglgt2__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define tglgt2__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define tglgt2__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define tglgt2__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define tglgt2__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define tglgt2__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define tglgt2__render_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define tglgt2__render_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Thread Occupancy */
#define tglgt2__render_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics Basic set :: Sampler00 Busy */
#define tglgt2__render_basic__sampler00_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler Slice0 Dualsubslice0 is bottleneck */
#define tglgt2__render_basic__sampler00_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define tglgt2__render_basic__samplers_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define tglgt2__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define tglgt2__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define tglgt2__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define tglgt2__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define tglgt2__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define tglgt2__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define tglgt2__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define tglgt2__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define tglgt2__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define tglgt2__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define tglgt2__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define tglgt2__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define tglgt2__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define tglgt2__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define tglgt2__render_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define tglgt2__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define tglgt2__render_basic__gti_read_throughput__read \
   tglgt1__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define tglgt2__render_basic__gti_write_throughput__read \
   tglgt1__render_basic__gti_write_throughput__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define tglgt2__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define tglgt2__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt2__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define tglgt2__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define tglgt2__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define tglgt2__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define tglgt2__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define tglgt2__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define tglgt2__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define tglgt2__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define tglgt2__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define tglgt2__compute_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define tglgt2__compute_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define tglgt2__compute_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define tglgt2__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define tglgt2__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define tglgt2__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define tglgt2__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define tglgt2__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define tglgt2__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define tglgt2__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define tglgt2__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define tglgt2__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define tglgt2__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define tglgt2__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define tglgt2__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define tglgt2__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define tglgt2__compute_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define tglgt2__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define tglgt2__compute_basic__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define tglgt2__compute_basic__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define tglgt2__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define tglgt2__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define tglgt2__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define tglgt2__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define tglgt2__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define tglgt2__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define tglgt2__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define tglgt2__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define tglgt2__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define tglgt2__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define tglgt2__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define tglgt2__render_pipe_profile__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define tglgt2__render_pipe_profile__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: EU Thread Occupancy */
#define tglgt2__render_pipe_profile__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define tglgt2__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define tglgt2__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define tglgt2__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define tglgt2__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define tglgt2__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define tglgt2__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define tglgt2__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define tglgt2__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define tglgt2__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define tglgt2__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define tglgt2__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define tglgt2__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define tglgt2__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define tglgt2__render_pipe_profile__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define tglgt2__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define tglgt2__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define tglgt2__render_pipe_profile__vs_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define tglgt2__render_pipe_profile__hs_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define tglgt2__render_pipe_profile__ds_bottleneck__read \
   bdw__render_pipe_profile__sf_stall__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define tglgt2__render_pipe_profile__gs_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define tglgt2__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define tglgt2__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define tglgt2__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define tglgt2__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define tglgt2__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define tglgt2__render_pipe_profile__hs_stall__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define tglgt2__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define tglgt2__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define tglgt2__render_pipe_profile__cl_stall__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define tglgt2__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define tglgt2__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define tglgt2__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define tglgt2__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define tglgt2__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define tglgt2__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define tglgt2__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define tglgt2__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define tglgt2__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define tglgt2__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define tglgt2__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define tglgt2__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define tglgt2__hdc_and_sf__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define tglgt2__hdc_and_sf__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Thread Occupancy */
#define tglgt2__hdc_and_sf__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define tglgt2__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define tglgt2__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define tglgt2__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define tglgt2__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define tglgt2__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define tglgt2__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define tglgt2__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define tglgt2__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define tglgt2__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define tglgt2__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define tglgt2__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define tglgt2__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define tglgt2__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define tglgt2__hdc_and_sf__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define tglgt2__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3 */
#define tglgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3 */
#define tglgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3 */
#define tglgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3 */
#define tglgt2__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3 */
#define tglgt2__hdc_and_sf__non_sampler_shader04_access_stalled_on_l3__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3 */
#define tglgt2__hdc_and_sf__non_sampler_shader05_access_stalled_on_l3__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define tglgt2__hdc_and_sf__poly_data_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define tglgt2__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define tglgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define tglgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define tglgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define tglgt2__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define tglgt2__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define tglgt2__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define tglgt2__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define tglgt2__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define tglgt2__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define tglgt2__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define tglgt2__rasterizer_and_pixel_backend__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define tglgt2__rasterizer_and_pixel_backend__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Thread Occupancy */
#define tglgt2__rasterizer_and_pixel_backend__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define tglgt2__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define tglgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define tglgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define tglgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define tglgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define tglgt2__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define tglgt2__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define tglgt2__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define tglgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define tglgt2__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define tglgt2__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define tglgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define tglgt2__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define tglgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define tglgt2__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define tglgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define tglgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Post-EarlyZ Pixel Data Ready */
#define tglgt2__rasterizer_and_pixel_backend__pixel_data00_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 PS Output Available */
#define tglgt2__rasterizer_and_pixel_backend__ps_output00_available__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 PS Output Available */
#define tglgt2__rasterizer_and_pixel_backend__ps_output01_available__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe2 PS Output Available */
#define tglgt2__rasterizer_and_pixel_backend__ps_output02_available__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Pixel Values Ready */
#define tglgt2__rasterizer_and_pixel_backend__pixel_values00_ready__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 Pixel Values Ready */
#define tglgt2__rasterizer_and_pixel_backend__pixel_values01_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe2 Pixel Values Ready */
#define tglgt2__rasterizer_and_pixel_backend__pixel_values02_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ00 is full */
#define tglgt2__rasterizer_and_pixel_backend__gt_request_queue00_full__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ01 is full */
#define tglgt2__rasterizer_and_pixel_backend__gt_request_queue01_full__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ10 is full */
#define tglgt2__rasterizer_and_pixel_backend__gt_request_queue10_full__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ11 is full */
#define tglgt2__rasterizer_and_pixel_backend__gt_request_queue11_full__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* L3_1 :: GPU Time Elapsed */
#define tglgt2__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_1 :: GPU Core Clocks */
#define tglgt2__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_1 :: AVG GPU Core Frequency */
#define tglgt2__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_1 :: AVG GPU Core Frequency */
#define tglgt2__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_1 :: GPU Busy */
#define tglgt2__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_1 :: EU Active */
#define tglgt2__l3_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_1 :: EU Stall */
#define tglgt2__l3_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_1 :: EU Thread Occupancy */
#define tglgt2__l3_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_1 :: VS Threads Dispatched */
#define tglgt2__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_1 :: HS Threads Dispatched */
#define tglgt2__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_1 :: DS Threads Dispatched */
#define tglgt2__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_1 :: GS Threads Dispatched */
#define tglgt2__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_1 :: FS Threads Dispatched */
#define tglgt2__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_1 :: CS Threads Dispatched */
#define tglgt2__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_1 :: Slice0 L3 Bank0 Input Available */
static float
tglgt2__l3_1__l30_bank0_input_available__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 7 READ C 6 READ FADD C 5 READ FADD C 4 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 7];
   uint64_t tmp1 = results->accumulator[query->c_offset + 6];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 5];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 4];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt2__l3_1__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* L3_1 :: Slice0 L3 Bank1 Input Available */
static float
tglgt2__l3_1__l30_bank1_input_available__read(UNUSED struct intel_perf_config *perf,
                                              const struct intel_perf_query_info *query,
                                              const struct intel_perf_query_result *results)
{
   /* RPN equation: C 3 READ C 2 READ FADD C 1 READ FADD C 0 READ FADD 4 FDIV 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->c_offset + 3];
   uint64_t tmp1 = results->accumulator[query->c_offset + 2];
   double tmp2 = tmp0 + tmp1;
   uint64_t tmp3 = results->accumulator[query->c_offset + 1];
   double tmp4 = tmp2 + tmp3;
   uint64_t tmp5 = results->accumulator[query->c_offset + 0];
   double tmp6 = tmp4 + tmp5;
   double tmp7 = tmp6;
   double tmp8 = 4;
   double tmp9 = tmp8 ? tmp7 / tmp8 : 0;
   uint64_t tmp10 = tmp9 * 100;
   double tmp11 = tmp10;
   double tmp12 = tglgt2__l3_1__gpu_core_clocks__read(perf, query, results);
   double tmp13 = tmp12 ? tmp11 / tmp12 : 0;

   return tmp13;
}

/* L3_1 :: Slice0 L3 Bank4 Input Available */
#define tglgt2__l3_1__l30_bank4_input_available__read \
   tglgt1__l3_2__l30_bank2_input_available__read

/* L3_1 :: Slice0 L3 Bank5 Input Available */
#define tglgt2__l3_1__l30_bank5_input_available__read \
   tglgt1__l3_2__l30_bank3_input_available__read

/* L3_2 :: GPU Time Elapsed */
#define tglgt2__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_2 :: GPU Core Clocks */
#define tglgt2__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_2 :: AVG GPU Core Frequency */
#define tglgt2__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_2 :: AVG GPU Core Frequency */
#define tglgt2__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_2 :: GPU Busy */
#define tglgt2__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_2 :: VS Threads Dispatched */
#define tglgt2__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_2 :: HS Threads Dispatched */
#define tglgt2__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_2 :: DS Threads Dispatched */
#define tglgt2__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_2 :: GS Threads Dispatched */
#define tglgt2__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_2 :: FS Threads Dispatched */
#define tglgt2__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_2 :: CS Threads Dispatched */
#define tglgt2__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_2 :: EU Active */
#define tglgt2__l3_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_2 :: EU Stall */
#define tglgt2__l3_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_2 :: EU Thread Occupancy */
#define tglgt2__l3_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_2 :: Slice0 L3 Bank2 Input Available */
#define tglgt2__l3_2__l30_bank2_input_available__read \
   tglgt1__l3_2__l30_bank2_input_available__read

/* L3_2 :: Slice0 L3 Bank3 Input Available */
#define tglgt2__l3_2__l30_bank3_input_available__read \
   tglgt1__l3_2__l30_bank3_input_available__read

/* L3_2 :: Slice0 L3 Bank6 Input Available */
#define tglgt2__l3_2__l30_bank6_input_available__read \
   tglgt2__l3_1__l30_bank1_input_available__read

/* L3_2 :: Slice0 L3 Bank7 Input Available */
#define tglgt2__l3_2__l30_bank7_input_available__read \
   tglgt2__l3_1__l30_bank0_input_available__read

/* L3_3 :: GPU Time Elapsed */
#define tglgt2__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_3 :: GPU Core Clocks */
#define tglgt2__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_3 :: AVG GPU Core Frequency */
#define tglgt2__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_3 :: AVG GPU Core Frequency */
#define tglgt2__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_3 :: GPU Busy */
#define tglgt2__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_3 :: VS Threads Dispatched */
#define tglgt2__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_3 :: HS Threads Dispatched */
#define tglgt2__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_3 :: DS Threads Dispatched */
#define tglgt2__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_3 :: GS Threads Dispatched */
#define tglgt2__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_3 :: FS Threads Dispatched */
#define tglgt2__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_3 :: CS Threads Dispatched */
#define tglgt2__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_3 :: EU Active */
#define tglgt2__l3_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_3 :: EU Stall */
#define tglgt2__l3_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_3 :: EU Thread Occupancy */
#define tglgt2__l3_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_3 :: Slice0 L3 Bank0 Output Ready */
#define tglgt2__l3_3__l30_bank0_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_3 :: Slice0 L3 Bank4 Output Ready */
#define tglgt2__l3_3__l30_bank4_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_4 :: GPU Time Elapsed */
#define tglgt2__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_4 :: GPU Core Clocks */
#define tglgt2__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_4 :: AVG GPU Core Frequency */
#define tglgt2__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_4 :: AVG GPU Core Frequency */
#define tglgt2__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_4 :: GPU Busy */
#define tglgt2__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_4 :: VS Threads Dispatched */
#define tglgt2__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_4 :: HS Threads Dispatched */
#define tglgt2__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_4 :: DS Threads Dispatched */
#define tglgt2__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_4 :: GS Threads Dispatched */
#define tglgt2__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_4 :: FS Threads Dispatched */
#define tglgt2__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_4 :: CS Threads Dispatched */
#define tglgt2__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_4 :: EU Active */
#define tglgt2__l3_4__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_4 :: EU Stall */
#define tglgt2__l3_4__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_4 :: EU Thread Occupancy */
#define tglgt2__l3_4__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_4 :: Slice0 L3 Bank1 Output Ready */
#define tglgt2__l3_4__l30_bank1_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_4 :: Slice0 L3 Bank5 Output Ready */
#define tglgt2__l3_4__l30_bank5_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_5 :: GPU Time Elapsed */
#define tglgt2__l3_5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_5 :: GPU Core Clocks */
#define tglgt2__l3_5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_5 :: AVG GPU Core Frequency */
#define tglgt2__l3_5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_5 :: AVG GPU Core Frequency */
#define tglgt2__l3_5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_5 :: GPU Busy */
#define tglgt2__l3_5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_5 :: VS Threads Dispatched */
#define tglgt2__l3_5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_5 :: HS Threads Dispatched */
#define tglgt2__l3_5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_5 :: DS Threads Dispatched */
#define tglgt2__l3_5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_5 :: GS Threads Dispatched */
#define tglgt2__l3_5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_5 :: FS Threads Dispatched */
#define tglgt2__l3_5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_5 :: CS Threads Dispatched */
#define tglgt2__l3_5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_5 :: EU Active */
#define tglgt2__l3_5__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_5 :: EU Stall */
#define tglgt2__l3_5__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_5 :: EU Thread Occupancy */
#define tglgt2__l3_5__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_5 :: Slice0 L3 Bank2 Output Ready */
#define tglgt2__l3_5__l30_bank2_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_5 :: Slice0 L3 Bank6 Output Ready */
#define tglgt2__l3_5__l30_bank6_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_6 :: GPU Time Elapsed */
#define tglgt2__l3_6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_6 :: GPU Core Clocks */
#define tglgt2__l3_6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_6 :: AVG GPU Core Frequency */
#define tglgt2__l3_6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_6 :: AVG GPU Core Frequency */
#define tglgt2__l3_6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_6 :: GPU Busy */
#define tglgt2__l3_6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_6 :: VS Threads Dispatched */
#define tglgt2__l3_6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_6 :: HS Threads Dispatched */
#define tglgt2__l3_6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_6 :: DS Threads Dispatched */
#define tglgt2__l3_6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_6 :: GS Threads Dispatched */
#define tglgt2__l3_6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_6 :: FS Threads Dispatched */
#define tglgt2__l3_6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_6 :: CS Threads Dispatched */
#define tglgt2__l3_6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_6 :: EU Active */
#define tglgt2__l3_6__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_6 :: EU Stall */
#define tglgt2__l3_6__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_6 :: EU Thread Occupancy */
#define tglgt2__l3_6__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_6 :: Slice0 L3 Bank3 Output Ready */
#define tglgt2__l3_6__l30_bank3_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_6 :: Slice0 L3 Bank7 Output Ready */
#define tglgt2__l3_6__l30_bank7_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* Sampler_1 :: GPU Time Elapsed */
#define tglgt2__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_1 :: GPU Core Clocks */
#define tglgt2__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define tglgt2__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define tglgt2__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_1 :: GPU Busy */
#define tglgt2__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_1 :: VS Threads Dispatched */
#define tglgt2__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_1 :: HS Threads Dispatched */
#define tglgt2__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_1 :: DS Threads Dispatched */
#define tglgt2__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_1 :: GS Threads Dispatched */
#define tglgt2__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_1 :: FS Threads Dispatched */
#define tglgt2__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_1 :: CS Threads Dispatched */
#define tglgt2__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_1 :: EU Active */
#define tglgt2__sampler_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_1 :: EU Stall */
#define tglgt2__sampler_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_1 :: EU Thread Occupancy */
#define tglgt2__sampler_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_1 :: Slice0 DualSubslice0 Input Available */
#define tglgt2__sampler_1__sampler00_input_available__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice1 Input Available */
#define tglgt2__sampler_1__sampler01_input_available__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice2 Input Available */
#define tglgt2__sampler_1__sampler02_input_available__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice3 Input Available */
#define tglgt2__sampler_1__sampler03_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice4 Input Available */
#define tglgt2__sampler_1__sampler04_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_1 :: Slice0 DualSubslice5 Input Available */
#define tglgt2__sampler_1__sampler05_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Sampler_2 :: GPU Time Elapsed */
#define tglgt2__sampler_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_2 :: GPU Core Clocks */
#define tglgt2__sampler_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_2 :: AVG GPU Core Frequency */
#define tglgt2__sampler_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_2 :: AVG GPU Core Frequency */
#define tglgt2__sampler_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_2 :: GPU Busy */
#define tglgt2__sampler_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_2 :: VS Threads Dispatched */
#define tglgt2__sampler_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_2 :: HS Threads Dispatched */
#define tglgt2__sampler_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_2 :: DS Threads Dispatched */
#define tglgt2__sampler_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_2 :: GS Threads Dispatched */
#define tglgt2__sampler_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_2 :: FS Threads Dispatched */
#define tglgt2__sampler_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_2 :: CS Threads Dispatched */
#define tglgt2__sampler_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_2 :: EU Active */
#define tglgt2__sampler_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_2 :: EU Stall */
#define tglgt2__sampler_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_2 :: EU Thread Occupancy */
#define tglgt2__sampler_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_2 :: Slice0 DualSubslice0 Sampler Output Ready */
#define tglgt2__sampler_2__sampler00_output_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice1 Sampler Output Ready */
#define tglgt2__sampler_2__sampler01_output_ready__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice2 Sampler Output Ready */
#define tglgt2__sampler_2__sampler02_output_ready__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice3 Sampler Output Ready */
#define tglgt2__sampler_2__sampler03_output_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice4 Sampler Output Ready */
#define tglgt2__sampler_2__sampler04_output_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_2 :: Slice0 DualSubslice5 Sampler Output Ready */
#define tglgt2__sampler_2__sampler05_output_ready__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: GPU Time Elapsed */
#define tglgt2__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_1 :: GPU Core Clocks */
#define tglgt2__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_1 :: AVG GPU Core Frequency */
#define tglgt2__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_1 :: AVG GPU Core Frequency */
#define tglgt2__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_1 :: GPU Busy */
#define tglgt2__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_1 :: VS Threads Dispatched */
#define tglgt2__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_1 :: HS Threads Dispatched */
#define tglgt2__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_1 :: DS Threads Dispatched */
#define tglgt2__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_1 :: GS Threads Dispatched */
#define tglgt2__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_1 :: FS Threads Dispatched */
#define tglgt2__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_1 :: CS Threads Dispatched */
#define tglgt2__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_1 :: EU Active */
#define tglgt2__tdl_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_1 :: EU Stall */
#define tglgt2__tdl_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_1 :: EU Thread Occupancy */
#define tglgt2__tdl_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher */
#define tglgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher */
#define tglgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher */
#define tglgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher */
#define tglgt2__tdl_1__non_ps_thread03_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher */
#define tglgt2__tdl_1__non_ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher */
#define tglgt2__tdl_1__non_ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0 */
#define tglgt2__tdl_1__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1 */
#define tglgt2__tdl_1__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2 */
#define tglgt2__tdl_1__thread_header00_ready_port2__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3 */
#define tglgt2__tdl_1__thread_header00_ready_port3__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0 */
#define tglgt2__tdl_1__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1 */
#define tglgt2__tdl_1__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2 */
#define tglgt2__tdl_1__thread_header01_ready_port2__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3 */
#define tglgt2__tdl_1__thread_header01_ready_port3__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher */
#define tglgt2__tdl_1__thread_header00_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher */
#define tglgt2__tdl_1__thread_header01_ready__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* TDL_2 :: GPU Time Elapsed */
#define tglgt2__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_2 :: GPU Core Clocks */
#define tglgt2__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_2 :: AVG GPU Core Frequency */
#define tglgt2__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_2 :: AVG GPU Core Frequency */
#define tglgt2__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_2 :: GPU Busy */
#define tglgt2__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_2 :: VS Threads Dispatched */
#define tglgt2__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_2 :: HS Threads Dispatched */
#define tglgt2__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_2 :: DS Threads Dispatched */
#define tglgt2__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_2 :: GS Threads Dispatched */
#define tglgt2__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_2 :: FS Threads Dispatched */
#define tglgt2__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_2 :: CS Threads Dispatched */
#define tglgt2__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_2 :: EU Active */
#define tglgt2__tdl_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_2 :: EU Stall */
#define tglgt2__tdl_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_2 :: EU Thread Occupancy */
#define tglgt2__tdl_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher */
#define tglgt2__tdl_2__ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher */
#define tglgt2__tdl_2__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice2 Thread Dispatcher */
#define tglgt2__tdl_2__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher */
#define tglgt2__tdl_2__ps_thread03_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher */
#define tglgt2__tdl_2__ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher */
#define tglgt2__tdl_2__ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher */
#define tglgt2__tdl_2__thread_header05_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0 */
#define tglgt2__tdl_2__thread_header05_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1 */
#define tglgt2__tdl_2__thread_header05_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 2 */
#define tglgt2__tdl_2__thread_header05_ready_port2__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3 */
#define tglgt2__tdl_2__thread_header05_ready_port3__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_3 :: GPU Time Elapsed */
#define tglgt2__tdl_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_3 :: GPU Core Clocks */
#define tglgt2__tdl_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_3 :: AVG GPU Core Frequency */
#define tglgt2__tdl_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_3 :: AVG GPU Core Frequency */
#define tglgt2__tdl_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_3 :: GPU Busy */
#define tglgt2__tdl_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_3 :: VS Threads Dispatched */
#define tglgt2__tdl_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_3 :: HS Threads Dispatched */
#define tglgt2__tdl_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_3 :: DS Threads Dispatched */
#define tglgt2__tdl_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_3 :: GS Threads Dispatched */
#define tglgt2__tdl_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_3 :: FS Threads Dispatched */
#define tglgt2__tdl_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_3 :: CS Threads Dispatched */
#define tglgt2__tdl_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_3 :: EU Active */
#define tglgt2__tdl_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_3 :: EU Stall */
#define tglgt2__tdl_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_3 :: EU Thread Occupancy */
#define tglgt2__tdl_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher */
#define tglgt2__tdl_3__thread_header02_ready__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher */
#define tglgt2__tdl_3__thread_header03_ready__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher */
#define tglgt2__tdl_3__thread_header04_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0 */
#define tglgt2__tdl_3__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 1 */
#define tglgt2__tdl_3__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2 */
#define tglgt2__tdl_3__thread_header02_ready_port2__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 3 */
#define tglgt2__tdl_3__thread_header02_ready_port3__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0 */
#define tglgt2__tdl_3__thread_header03_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1 */
#define tglgt2__tdl_3__thread_header03_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2 */
#define tglgt2__tdl_3__thread_header03_ready_port2__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3 */
#define tglgt2__tdl_3__thread_header03_ready_port3__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0 */
#define tglgt2__tdl_3__thread_header04_ready_port0__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1 */
#define tglgt2__tdl_3__thread_header04_ready_port1__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2 */
#define tglgt2__tdl_3__thread_header04_ready_port2__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3 */
#define tglgt2__tdl_3__thread_header04_ready_port3__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* GpuBusyness :: GPU Time Elapsed */
#define tglgt2__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* GpuBusyness :: GPU Core Clocks */
#define tglgt2__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define tglgt2__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define tglgt2__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* GpuBusyness :: GPU Busy */
#define tglgt2__gpu_busyness__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* GpuBusyness :: EU Active */
#define tglgt2__gpu_busyness__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* GpuBusyness :: EU Stall */
#define tglgt2__gpu_busyness__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* GpuBusyness :: EU Thread Occupancy */
#define tglgt2__gpu_busyness__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* GpuBusyness :: VS Threads Dispatched */
#define tglgt2__gpu_busyness__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* GpuBusyness :: HS Threads Dispatched */
#define tglgt2__gpu_busyness__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* GpuBusyness :: DS Threads Dispatched */
#define tglgt2__gpu_busyness__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* GpuBusyness :: GS Threads Dispatched */
#define tglgt2__gpu_busyness__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* GpuBusyness :: FS Threads Dispatched */
#define tglgt2__gpu_busyness__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* GpuBusyness :: CS Threads Dispatched */
#define tglgt2__gpu_busyness__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* GpuBusyness :: Render Ring Busy */
#define tglgt2__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* GpuBusyness :: Compute Ring Busy */
#define tglgt2__gpu_busyness__compute_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* GpuBusyness :: Posh Ring Busy */
#define tglgt2__gpu_busyness__posh_engine_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* GpuBusyness :: Blitter Ring Busy */
#define tglgt2__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* GpuBusyness :: Vebox Ring Busy */
#define tglgt2__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* GpuBusyness :: Vdbox0 Ring Busy */
#define tglgt2__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* GpuBusyness :: Vdbox1 Ring Busy */
#define tglgt2__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__ds_stall__read

/* GpuBusyness :: Render and compute engines are simultaneously busy */
#define tglgt2__gpu_busyness__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* GpuBusyness :: Any Engine Busy */
#define tglgt2__gpu_busyness__any_engine_busy__read \
   bdw__render_basic__sampler1_busy__read

/* EuActivity1 :: GPU Time Elapsed */
#define tglgt2__eu_activity1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity1 :: GPU Core Clocks */
#define tglgt2__eu_activity1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity1 :: GPU Busy */
#define tglgt2__eu_activity1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity1 :: VS Threads Dispatched */
#define tglgt2__eu_activity1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity1 :: HS Threads Dispatched */
#define tglgt2__eu_activity1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity1 :: DS Threads Dispatched */
#define tglgt2__eu_activity1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity1 :: GS Threads Dispatched */
#define tglgt2__eu_activity1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity1 :: FS Threads Dispatched */
#define tglgt2__eu_activity1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity1 :: CS Threads Dispatched */
#define tglgt2__eu_activity1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity1 :: Render Ring Busy */
#define tglgt2__eu_activity1__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity1 :: Compute Ring Busy */
#define tglgt2__eu_activity1__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity1 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity1__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity1 :: GTI Read Throughput */
#define tglgt2__eu_activity1__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity1 :: GTI Write Throughput */
#define tglgt2__eu_activity1__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity1 :: VS FPU Pipe Active */
#define tglgt2__eu_activity1__vs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity1 :: PS FPU Pipe Active */
#define tglgt2__eu_activity1__ps_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity1 :: EU Send Pipe Active */
#define tglgt2__eu_activity1__eu_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: GPU Time Elapsed */
#define tglgt2__eu_activity2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity2 :: GPU Core Clocks */
#define tglgt2__eu_activity2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity2 :: GPU Busy */
#define tglgt2__eu_activity2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity2 :: VS Threads Dispatched */
#define tglgt2__eu_activity2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity2 :: HS Threads Dispatched */
#define tglgt2__eu_activity2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity2 :: DS Threads Dispatched */
#define tglgt2__eu_activity2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity2 :: GS Threads Dispatched */
#define tglgt2__eu_activity2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity2 :: FS Threads Dispatched */
#define tglgt2__eu_activity2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity2 :: CS Threads Dispatched */
#define tglgt2__eu_activity2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity2 :: Render Ring Busy */
#define tglgt2__eu_activity2__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity2 :: Compute Ring Busy */
#define tglgt2__eu_activity2__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity2 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity2__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity2 :: GTI Read Throughput */
#define tglgt2__eu_activity2__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity2 :: GTI Write Throughput */
#define tglgt2__eu_activity2__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity2 :: CS EM Pipe Active */
#define tglgt2__eu_activity2__cs_em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: CS FPU Pipe Active */
#define tglgt2__eu_activity2__cs_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity2 :: CS Send Pipeline Active */
#define tglgt2__eu_activity2__cs_send_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: GPU Time Elapsed */
#define tglgt2__eu_activity3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity3 :: GPU Core Clocks */
#define tglgt2__eu_activity3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity3 :: GPU Busy */
#define tglgt2__eu_activity3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity3 :: VS Threads Dispatched */
#define tglgt2__eu_activity3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity3 :: HS Threads Dispatched */
#define tglgt2__eu_activity3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity3 :: DS Threads Dispatched */
#define tglgt2__eu_activity3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity3 :: GS Threads Dispatched */
#define tglgt2__eu_activity3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity3 :: FS Threads Dispatched */
#define tglgt2__eu_activity3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity3 :: CS Threads Dispatched */
#define tglgt2__eu_activity3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity3 :: Render Ring Busy */
#define tglgt2__eu_activity3__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity3 :: Compute Ring Busy */
#define tglgt2__eu_activity3__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity3 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity3__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity3 :: GTI Read Throughput */
#define tglgt2__eu_activity3__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity3 :: GTI Write Throughput */
#define tglgt2__eu_activity3__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity3 :: VS EM Pipe Active */
#define tglgt2__eu_activity3__vs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: PS EM Pipe Active */
#define tglgt2__eu_activity3__ps_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity3 :: PS Send Pipeline Active */
#define tglgt2__eu_activity3__ps_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity4 :: GPU Time Elapsed */
#define tglgt2__eu_activity4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity4 :: GPU Core Clocks */
#define tglgt2__eu_activity4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity4 :: GPU Busy */
#define tglgt2__eu_activity4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity4 :: VS Threads Dispatched */
#define tglgt2__eu_activity4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity4 :: HS Threads Dispatched */
#define tglgt2__eu_activity4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity4 :: DS Threads Dispatched */
#define tglgt2__eu_activity4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity4 :: GS Threads Dispatched */
#define tglgt2__eu_activity4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity4 :: FS Threads Dispatched */
#define tglgt2__eu_activity4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity4 :: CS Threads Dispatched */
#define tglgt2__eu_activity4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity4 :: Render Ring Busy */
#define tglgt2__eu_activity4__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity4 :: Compute Ring Busy */
#define tglgt2__eu_activity4__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity4 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity4__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity4 :: GTI Read Throughput */
#define tglgt2__eu_activity4__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity4 :: GTI Write Throughput */
#define tglgt2__eu_activity4__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity4 :: HS FPU Pipe Active */
#define tglgt2__eu_activity4__hs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity4 :: DS FPU Pipe Active */
#define tglgt2__eu_activity4__ds_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity4 :: VS Send Pipe Active */
#define tglgt2__eu_activity4__vs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity5 :: GPU Time Elapsed */
#define tglgt2__eu_activity5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity5 :: GPU Core Clocks */
#define tglgt2__eu_activity5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity5 :: GPU Busy */
#define tglgt2__eu_activity5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity5 :: VS Threads Dispatched */
#define tglgt2__eu_activity5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity5 :: HS Threads Dispatched */
#define tglgt2__eu_activity5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity5 :: DS Threads Dispatched */
#define tglgt2__eu_activity5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity5 :: GS Threads Dispatched */
#define tglgt2__eu_activity5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity5 :: FS Threads Dispatched */
#define tglgt2__eu_activity5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity5 :: CS Threads Dispatched */
#define tglgt2__eu_activity5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity5 :: Render Ring Busy */
#define tglgt2__eu_activity5__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity5 :: Compute Ring Busy */
#define tglgt2__eu_activity5__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity5 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity5__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity5 :: GTI Read Throughput */
#define tglgt2__eu_activity5__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity5 :: GTI Write Throughput */
#define tglgt2__eu_activity5__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity5 :: HS EM Pipe Active */
#define tglgt2__eu_activity5__hs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity5 :: DS EM Pipe Active */
#define tglgt2__eu_activity5__ds_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity5 :: HS Send Pipe Active */
#define tglgt2__eu_activity5__hs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity6 :: GPU Time Elapsed */
#define tglgt2__eu_activity6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity6 :: GPU Core Clocks */
#define tglgt2__eu_activity6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity6 :: GPU Busy */
#define tglgt2__eu_activity6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity6 :: VS Threads Dispatched */
#define tglgt2__eu_activity6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity6 :: HS Threads Dispatched */
#define tglgt2__eu_activity6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity6 :: DS Threads Dispatched */
#define tglgt2__eu_activity6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity6 :: GS Threads Dispatched */
#define tglgt2__eu_activity6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity6 :: FS Threads Dispatched */
#define tglgt2__eu_activity6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity6 :: CS Threads Dispatched */
#define tglgt2__eu_activity6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity6 :: Render Ring Busy */
#define tglgt2__eu_activity6__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity6 :: Compute Ring Busy */
#define tglgt2__eu_activity6__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity6 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity6__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity6 :: GTI Read Throughput */
#define tglgt2__eu_activity6__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity6 :: GTI Write Throughput */
#define tglgt2__eu_activity6__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity6 :: GS FPU Pipe Active */
#define tglgt2__eu_activity6__gs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity6 :: GS EM Pipe Active */
#define tglgt2__eu_activity6__gs_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity6 :: GS Send Pipe Active */
#define tglgt2__eu_activity6__gs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: GPU Time Elapsed */
#define tglgt2__eu_activity7__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity7 :: GPU Core Clocks */
#define tglgt2__eu_activity7__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity7__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity7__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity7 :: GPU Busy */
#define tglgt2__eu_activity7__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity7 :: VS Threads Dispatched */
#define tglgt2__eu_activity7__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity7 :: HS Threads Dispatched */
#define tglgt2__eu_activity7__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity7 :: DS Threads Dispatched */
#define tglgt2__eu_activity7__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity7 :: GS Threads Dispatched */
#define tglgt2__eu_activity7__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity7 :: FS Threads Dispatched */
#define tglgt2__eu_activity7__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity7 :: CS Threads Dispatched */
#define tglgt2__eu_activity7__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity7 :: EU FPU Pipe Active */
#define tglgt2__eu_activity7__fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity7 :: EM Pipe Active */
#define tglgt2__eu_activity7__em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: EU FPU And EM Pipes Active */
#define tglgt2__eu_activity7__eu_fpu_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity7 :: EU AVG IPC Rate */
#define tglgt2__eu_activity7__eu_avg_ipc_rate__read \
   tglgt1__eu_activity7__eu_avg_ipc_rate__read

/* EuActivity7 :: Render Ring Busy */
#define tglgt2__eu_activity7__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity7 :: Compute Ring Busy */
#define tglgt2__eu_activity7__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity7 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity7__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity7 :: GTI Read Throughput */
#define tglgt2__eu_activity7__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity7 :: GTI Write Throughput */
#define tglgt2__eu_activity7__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: GPU Time Elapsed */
#define tglgt2__eu_activity8__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity8 :: GPU Core Clocks */
#define tglgt2__eu_activity8__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity8__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define tglgt2__eu_activity8__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity8 :: GPU Busy */
#define tglgt2__eu_activity8__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity8 :: VS Threads Dispatched */
#define tglgt2__eu_activity8__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity8 :: HS Threads Dispatched */
#define tglgt2__eu_activity8__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity8 :: DS Threads Dispatched */
#define tglgt2__eu_activity8__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity8 :: GS Threads Dispatched */
#define tglgt2__eu_activity8__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity8 :: FS Threads Dispatched */
#define tglgt2__eu_activity8__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity8 :: CS Threads Dispatched */
#define tglgt2__eu_activity8__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity8 :: Render Ring Busy */
#define tglgt2__eu_activity8__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity8 :: Compute Ring Busy */
#define tglgt2__eu_activity8__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity8 :: Render and compute engines are simultaneously busy */
#define tglgt2__eu_activity8__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity8 :: GTI Read Throughput */
#define tglgt2__eu_activity8__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity8 :: GTI Write Throughput */
#define tglgt2__eu_activity8__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: DS Send Pipe Active */
#define tglgt2__eu_activity8__ds_send_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set TestOa :: GPU Time Elapsed */
#define tglgt2__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TestOa :: GPU Core Clocks */
#define tglgt2__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define tglgt2__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define tglgt2__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TestOa :: TestCounter0 */
#define tglgt2__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Metric set TestOa :: TestCounter1 */
#define tglgt2__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Metric set TestOa :: TestCounter2 */
#define tglgt2__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Metric set TestOa :: TestCounter3 */
#define tglgt2__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Metric set TestOa :: TestCounter4 */
#define tglgt2__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Metric set TestOa :: TestCounter5 */
#define tglgt2__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Metric set TestOa :: TestCounter6 */
#define tglgt2__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Metric set TestOa :: TestCounter7 */
#define tglgt2__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set TestOa :: TestCounter8 */
#define tglgt2__test_oa__counter8__read \
   hsw__compute_extended__typed_writes0__read

/* Metric set TestOa :: TestCounter9 - OAR enable */
#define tglgt2__test_oa__counter9__read \
   hsw__compute_extended__untyped_writes0__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define rkl__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define rkl__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define rkl__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define rkl__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define rkl__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define rkl__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define rkl__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define rkl__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define rkl__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define rkl__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define rkl__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define rkl__render_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define rkl__render_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Thread Occupancy */
#define rkl__render_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics Basic set :: Sampler00 Busy */
#define rkl__render_basic__sampler00_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler Slice0 Dualsubslice0 is bottleneck */
#define rkl__render_basic__sampler00_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define rkl__render_basic__samplers_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define rkl__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define rkl__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define rkl__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define rkl__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define rkl__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define rkl__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define rkl__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define rkl__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define rkl__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define rkl__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define rkl__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define rkl__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define rkl__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define rkl__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define rkl__render_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define rkl__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define rkl__render_basic__gti_read_throughput__read \
   tglgt1__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define rkl__render_basic__gti_write_throughput__read \
   tglgt1__render_basic__gti_write_throughput__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define rkl__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define rkl__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define rkl__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define rkl__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define rkl__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define rkl__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define rkl__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define rkl__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define rkl__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define rkl__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define rkl__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define rkl__compute_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define rkl__compute_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define rkl__compute_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define rkl__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define rkl__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define rkl__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define rkl__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define rkl__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define rkl__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define rkl__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define rkl__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define rkl__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define rkl__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define rkl__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define rkl__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define rkl__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define rkl__compute_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define rkl__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define rkl__compute_basic__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define rkl__compute_basic__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define rkl__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define rkl__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define rkl__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define rkl__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define rkl__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define rkl__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define rkl__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define rkl__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define rkl__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define rkl__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define rkl__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define rkl__render_pipe_profile__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define rkl__render_pipe_profile__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: EU Thread Occupancy */
#define rkl__render_pipe_profile__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define rkl__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define rkl__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define rkl__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define rkl__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define rkl__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define rkl__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define rkl__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define rkl__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define rkl__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define rkl__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define rkl__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define rkl__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define rkl__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define rkl__render_pipe_profile__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define rkl__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define rkl__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define rkl__render_pipe_profile__vs_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define rkl__render_pipe_profile__hs_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define rkl__render_pipe_profile__ds_bottleneck__read \
   bdw__render_pipe_profile__sf_stall__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define rkl__render_pipe_profile__gs_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define rkl__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define rkl__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define rkl__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define rkl__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define rkl__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define rkl__render_pipe_profile__hs_stall__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define rkl__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define rkl__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define rkl__render_pipe_profile__cl_stall__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define rkl__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define rkl__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define rkl__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define rkl__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define rkl__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define rkl__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define rkl__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define rkl__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define rkl__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define rkl__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define rkl__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define rkl__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define rkl__hdc_and_sf__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define rkl__hdc_and_sf__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Thread Occupancy */
#define rkl__hdc_and_sf__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define rkl__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define rkl__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define rkl__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define rkl__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define rkl__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define rkl__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define rkl__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define rkl__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define rkl__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define rkl__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define rkl__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define rkl__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define rkl__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define rkl__hdc_and_sf__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define rkl__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3 */
#define rkl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3 */
#define rkl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define rkl__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define rkl__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define rkl__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define rkl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define rkl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define rkl__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define rkl__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define rkl__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define rkl__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define rkl__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define rkl__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define rkl__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define rkl__rasterizer_and_pixel_backend__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define rkl__rasterizer_and_pixel_backend__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Thread Occupancy */
#define rkl__rasterizer_and_pixel_backend__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define rkl__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define rkl__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define rkl__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define rkl__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define rkl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define rkl__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define rkl__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define rkl__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define rkl__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define rkl__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define rkl__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define rkl__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define rkl__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define rkl__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define rkl__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define rkl__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define rkl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Post-EarlyZ Pixel Data Ready */
#define rkl__rasterizer_and_pixel_backend__pixel_data00_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 PS Output Available */
#define rkl__rasterizer_and_pixel_backend__ps_output00_available__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Pixel Values Ready */
#define rkl__rasterizer_and_pixel_backend__pixel_values00_ready__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ00 is full */
#define rkl__rasterizer_and_pixel_backend__gt_request_queue00_full__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ01 is full */
#define rkl__rasterizer_and_pixel_backend__gt_request_queue01_full__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ10 is full */
#define rkl__rasterizer_and_pixel_backend__gt_request_queue10_full__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ11 is full */
#define rkl__rasterizer_and_pixel_backend__gt_request_queue11_full__read \
   bdw__render_pipe_profile__hs_stall__read

/* L3_1 :: GPU Time Elapsed */
#define rkl__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_1 :: GPU Core Clocks */
#define rkl__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_1 :: AVG GPU Core Frequency */
#define rkl__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_1 :: AVG GPU Core Frequency */
#define rkl__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_1 :: GPU Busy */
#define rkl__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_1 :: EU Active */
#define rkl__l3_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_1 :: EU Stall */
#define rkl__l3_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_1 :: EU Thread Occupancy */
#define rkl__l3_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_1 :: VS Threads Dispatched */
#define rkl__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_1 :: HS Threads Dispatched */
#define rkl__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_1 :: DS Threads Dispatched */
#define rkl__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_1 :: GS Threads Dispatched */
#define rkl__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_1 :: FS Threads Dispatched */
#define rkl__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_1 :: CS Threads Dispatched */
#define rkl__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_1 :: Slice0 L3 Bank0 Input Available */
#define rkl__l3_1__l30_bank0_input_available__read \
   tglgt1__l3_1__l30_bank0_input_available__read

/* L3_1 :: Slice0 L3 Bank1 Input Available */
#define rkl__l3_1__l30_bank1_input_available__read \
   tglgt1__l3_1__l30_bank1_input_available__read

/* L3_2 :: GPU Time Elapsed */
#define rkl__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_2 :: GPU Core Clocks */
#define rkl__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_2 :: AVG GPU Core Frequency */
#define rkl__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_2 :: AVG GPU Core Frequency */
#define rkl__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_2 :: GPU Busy */
#define rkl__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_2 :: VS Threads Dispatched */
#define rkl__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_2 :: HS Threads Dispatched */
#define rkl__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_2 :: DS Threads Dispatched */
#define rkl__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_2 :: GS Threads Dispatched */
#define rkl__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_2 :: FS Threads Dispatched */
#define rkl__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_2 :: CS Threads Dispatched */
#define rkl__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_2 :: EU Active */
#define rkl__l3_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_2 :: EU Stall */
#define rkl__l3_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_2 :: EU Thread Occupancy */
#define rkl__l3_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_2 :: Slice0 L3 Bank2 Input Available */
#define rkl__l3_2__l30_bank2_input_available__read \
   tglgt1__l3_2__l30_bank2_input_available__read

/* L3_2 :: Slice0 L3 Bank3 Input Available */
#define rkl__l3_2__l30_bank3_input_available__read \
   tglgt1__l3_2__l30_bank3_input_available__read

/* L3_3 :: GPU Time Elapsed */
#define rkl__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_3 :: GPU Core Clocks */
#define rkl__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_3 :: AVG GPU Core Frequency */
#define rkl__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_3 :: AVG GPU Core Frequency */
#define rkl__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_3 :: GPU Busy */
#define rkl__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_3 :: VS Threads Dispatched */
#define rkl__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_3 :: HS Threads Dispatched */
#define rkl__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_3 :: DS Threads Dispatched */
#define rkl__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_3 :: GS Threads Dispatched */
#define rkl__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_3 :: FS Threads Dispatched */
#define rkl__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_3 :: CS Threads Dispatched */
#define rkl__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_3 :: EU Active */
#define rkl__l3_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_3 :: EU Stall */
#define rkl__l3_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_3 :: EU Thread Occupancy */
#define rkl__l3_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_3 :: Slice0 L3 Bank0 Output Ready */
#define rkl__l3_3__l30_bank0_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_4 :: GPU Time Elapsed */
#define rkl__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_4 :: GPU Core Clocks */
#define rkl__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_4 :: AVG GPU Core Frequency */
#define rkl__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_4 :: AVG GPU Core Frequency */
#define rkl__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_4 :: GPU Busy */
#define rkl__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_4 :: VS Threads Dispatched */
#define rkl__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_4 :: HS Threads Dispatched */
#define rkl__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_4 :: DS Threads Dispatched */
#define rkl__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_4 :: GS Threads Dispatched */
#define rkl__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_4 :: FS Threads Dispatched */
#define rkl__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_4 :: CS Threads Dispatched */
#define rkl__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_4 :: EU Active */
#define rkl__l3_4__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_4 :: EU Stall */
#define rkl__l3_4__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_4 :: EU Thread Occupancy */
#define rkl__l3_4__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_4 :: Slice0 L3 Bank1 Output Ready */
#define rkl__l3_4__l30_bank1_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_5 :: GPU Time Elapsed */
#define rkl__l3_5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_5 :: GPU Core Clocks */
#define rkl__l3_5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_5 :: AVG GPU Core Frequency */
#define rkl__l3_5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_5 :: AVG GPU Core Frequency */
#define rkl__l3_5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_5 :: GPU Busy */
#define rkl__l3_5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_5 :: VS Threads Dispatched */
#define rkl__l3_5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_5 :: HS Threads Dispatched */
#define rkl__l3_5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_5 :: DS Threads Dispatched */
#define rkl__l3_5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_5 :: GS Threads Dispatched */
#define rkl__l3_5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_5 :: FS Threads Dispatched */
#define rkl__l3_5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_5 :: CS Threads Dispatched */
#define rkl__l3_5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_5 :: EU Active */
#define rkl__l3_5__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_5 :: EU Stall */
#define rkl__l3_5__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_5 :: EU Thread Occupancy */
#define rkl__l3_5__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_5 :: Slice0 L3 Bank2 Output Ready */
#define rkl__l3_5__l30_bank2_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_6 :: GPU Time Elapsed */
#define rkl__l3_6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_6 :: GPU Core Clocks */
#define rkl__l3_6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_6 :: AVG GPU Core Frequency */
#define rkl__l3_6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_6 :: AVG GPU Core Frequency */
#define rkl__l3_6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_6 :: GPU Busy */
#define rkl__l3_6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_6 :: VS Threads Dispatched */
#define rkl__l3_6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_6 :: HS Threads Dispatched */
#define rkl__l3_6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_6 :: DS Threads Dispatched */
#define rkl__l3_6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_6 :: GS Threads Dispatched */
#define rkl__l3_6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_6 :: FS Threads Dispatched */
#define rkl__l3_6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_6 :: CS Threads Dispatched */
#define rkl__l3_6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_6 :: EU Active */
#define rkl__l3_6__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_6 :: EU Stall */
#define rkl__l3_6__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_6 :: EU Thread Occupancy */
#define rkl__l3_6__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_6 :: Slice0 L3 Bank3 Output Ready */
#define rkl__l3_6__l30_bank3_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* Sampler_1 :: GPU Time Elapsed */
#define rkl__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_1 :: GPU Core Clocks */
#define rkl__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define rkl__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define rkl__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_1 :: GPU Busy */
#define rkl__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_1 :: VS Threads Dispatched */
#define rkl__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_1 :: HS Threads Dispatched */
#define rkl__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_1 :: DS Threads Dispatched */
#define rkl__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_1 :: GS Threads Dispatched */
#define rkl__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_1 :: FS Threads Dispatched */
#define rkl__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_1 :: CS Threads Dispatched */
#define rkl__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_1 :: EU Active */
#define rkl__sampler_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_1 :: EU Stall */
#define rkl__sampler_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_1 :: EU Thread Occupancy */
#define rkl__sampler_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_1 :: Slice0 DualSubslice0 Input Available */
#define rkl__sampler_1__sampler00_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Sampler_1 :: Slice0 DualSubslice1 Input Available */
#define rkl__sampler_1__sampler01_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_1 :: Slice0 DualSubslice0 Sampler Output Ready */
#define rkl__sampler_1__sampler00_output_ready__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice1 Sampler Output Ready */
#define rkl__sampler_1__sampler01_output_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_1 :: GPU Time Elapsed */
#define rkl__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_1 :: GPU Core Clocks */
#define rkl__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_1 :: AVG GPU Core Frequency */
#define rkl__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_1 :: AVG GPU Core Frequency */
#define rkl__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_1 :: GPU Busy */
#define rkl__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_1 :: VS Threads Dispatched */
#define rkl__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_1 :: HS Threads Dispatched */
#define rkl__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_1 :: DS Threads Dispatched */
#define rkl__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_1 :: GS Threads Dispatched */
#define rkl__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_1 :: FS Threads Dispatched */
#define rkl__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_1 :: CS Threads Dispatched */
#define rkl__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_1 :: EU Active */
#define rkl__tdl_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_1 :: EU Stall */
#define rkl__tdl_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_1 :: EU Thread Occupancy */
#define rkl__tdl_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher */
#define rkl__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher */
#define rkl__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_1 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher */
#define rkl__tdl_1__ps_thread00_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher */
#define rkl__tdl_1__ps_thread01_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0 */
#define rkl__tdl_1__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1 */
#define rkl__tdl_1__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2 */
#define rkl__tdl_1__thread_header00_ready_port2__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3 */
#define rkl__tdl_1__thread_header00_ready_port3__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0 */
#define rkl__tdl_1__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1 */
#define rkl__tdl_1__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2 */
#define rkl__tdl_1__thread_header01_ready_port2__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3 */
#define rkl__tdl_1__thread_header01_ready_port3__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher */
#define rkl__tdl_1__thread_header00_ready__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher */
#define rkl__tdl_1__thread_header01_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* GpuBusyness :: GPU Time Elapsed */
#define rkl__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* GpuBusyness :: GPU Core Clocks */
#define rkl__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define rkl__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define rkl__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* GpuBusyness :: GPU Busy */
#define rkl__gpu_busyness__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* GpuBusyness :: EU Active */
#define rkl__gpu_busyness__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* GpuBusyness :: EU Stall */
#define rkl__gpu_busyness__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* GpuBusyness :: EU Thread Occupancy */
#define rkl__gpu_busyness__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* GpuBusyness :: VS Threads Dispatched */
#define rkl__gpu_busyness__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* GpuBusyness :: HS Threads Dispatched */
#define rkl__gpu_busyness__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* GpuBusyness :: DS Threads Dispatched */
#define rkl__gpu_busyness__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* GpuBusyness :: GS Threads Dispatched */
#define rkl__gpu_busyness__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* GpuBusyness :: FS Threads Dispatched */
#define rkl__gpu_busyness__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* GpuBusyness :: CS Threads Dispatched */
#define rkl__gpu_busyness__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* GpuBusyness :: Render Ring Busy */
#define rkl__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* GpuBusyness :: Compute Ring Busy */
#define rkl__gpu_busyness__compute_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* GpuBusyness :: Posh Ring Busy */
#define rkl__gpu_busyness__posh_engine_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* GpuBusyness :: Blitter Ring Busy */
#define rkl__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* GpuBusyness :: Vebox Ring Busy */
#define rkl__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* GpuBusyness :: Vdbox0 Ring Busy */
#define rkl__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* GpuBusyness :: Render and compute engines are simultaneously busy */
#define rkl__gpu_busyness__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* GpuBusyness :: Any Engine Busy */
#define rkl__gpu_busyness__any_engine_busy__read \
   bdw__render_basic__sampler1_busy__read

/* EuActivity1 :: GPU Time Elapsed */
#define rkl__eu_activity1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity1 :: GPU Core Clocks */
#define rkl__eu_activity1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define rkl__eu_activity1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define rkl__eu_activity1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity1 :: GPU Busy */
#define rkl__eu_activity1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity1 :: VS Threads Dispatched */
#define rkl__eu_activity1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity1 :: HS Threads Dispatched */
#define rkl__eu_activity1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity1 :: DS Threads Dispatched */
#define rkl__eu_activity1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity1 :: GS Threads Dispatched */
#define rkl__eu_activity1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity1 :: FS Threads Dispatched */
#define rkl__eu_activity1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity1 :: CS Threads Dispatched */
#define rkl__eu_activity1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity1 :: Render Ring Busy */
#define rkl__eu_activity1__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity1 :: Compute Ring Busy */
#define rkl__eu_activity1__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity1 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity1__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity1 :: GTI Read Throughput */
#define rkl__eu_activity1__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity1 :: GTI Write Throughput */
#define rkl__eu_activity1__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity1 :: VS FPU Pipe Active */
#define rkl__eu_activity1__vs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity1 :: PS FPU Pipe Active */
#define rkl__eu_activity1__ps_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity1 :: EU Send Pipe Active */
#define rkl__eu_activity1__eu_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: GPU Time Elapsed */
#define rkl__eu_activity2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity2 :: GPU Core Clocks */
#define rkl__eu_activity2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define rkl__eu_activity2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define rkl__eu_activity2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity2 :: GPU Busy */
#define rkl__eu_activity2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity2 :: VS Threads Dispatched */
#define rkl__eu_activity2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity2 :: HS Threads Dispatched */
#define rkl__eu_activity2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity2 :: DS Threads Dispatched */
#define rkl__eu_activity2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity2 :: GS Threads Dispatched */
#define rkl__eu_activity2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity2 :: FS Threads Dispatched */
#define rkl__eu_activity2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity2 :: CS Threads Dispatched */
#define rkl__eu_activity2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity2 :: Render Ring Busy */
#define rkl__eu_activity2__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity2 :: Compute Ring Busy */
#define rkl__eu_activity2__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity2 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity2__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity2 :: GTI Read Throughput */
#define rkl__eu_activity2__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity2 :: GTI Write Throughput */
#define rkl__eu_activity2__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity2 :: CS EM Pipe Active */
#define rkl__eu_activity2__cs_em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: CS FPU Pipe Active */
#define rkl__eu_activity2__cs_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity2 :: CS Send Pipeline Active */
#define rkl__eu_activity2__cs_send_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: GPU Time Elapsed */
#define rkl__eu_activity3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity3 :: GPU Core Clocks */
#define rkl__eu_activity3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define rkl__eu_activity3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define rkl__eu_activity3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity3 :: GPU Busy */
#define rkl__eu_activity3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity3 :: VS Threads Dispatched */
#define rkl__eu_activity3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity3 :: HS Threads Dispatched */
#define rkl__eu_activity3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity3 :: DS Threads Dispatched */
#define rkl__eu_activity3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity3 :: GS Threads Dispatched */
#define rkl__eu_activity3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity3 :: FS Threads Dispatched */
#define rkl__eu_activity3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity3 :: CS Threads Dispatched */
#define rkl__eu_activity3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity3 :: Render Ring Busy */
#define rkl__eu_activity3__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity3 :: Compute Ring Busy */
#define rkl__eu_activity3__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity3 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity3__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity3 :: GTI Read Throughput */
#define rkl__eu_activity3__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity3 :: GTI Write Throughput */
#define rkl__eu_activity3__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity3 :: VS EM Pipe Active */
#define rkl__eu_activity3__vs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: PS EM Pipe Active */
#define rkl__eu_activity3__ps_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity3 :: PS Send Pipeline Active */
#define rkl__eu_activity3__ps_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity4 :: GPU Time Elapsed */
#define rkl__eu_activity4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity4 :: GPU Core Clocks */
#define rkl__eu_activity4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define rkl__eu_activity4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define rkl__eu_activity4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity4 :: GPU Busy */
#define rkl__eu_activity4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity4 :: VS Threads Dispatched */
#define rkl__eu_activity4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity4 :: HS Threads Dispatched */
#define rkl__eu_activity4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity4 :: DS Threads Dispatched */
#define rkl__eu_activity4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity4 :: GS Threads Dispatched */
#define rkl__eu_activity4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity4 :: FS Threads Dispatched */
#define rkl__eu_activity4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity4 :: CS Threads Dispatched */
#define rkl__eu_activity4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity4 :: Render Ring Busy */
#define rkl__eu_activity4__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity4 :: Compute Ring Busy */
#define rkl__eu_activity4__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity4 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity4__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity4 :: GTI Read Throughput */
#define rkl__eu_activity4__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity4 :: GTI Write Throughput */
#define rkl__eu_activity4__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity4 :: HS FPU Pipe Active */
#define rkl__eu_activity4__hs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity4 :: DS FPU Pipe Active */
#define rkl__eu_activity4__ds_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity4 :: VS Send Pipe Active */
#define rkl__eu_activity4__vs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity5 :: GPU Time Elapsed */
#define rkl__eu_activity5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity5 :: GPU Core Clocks */
#define rkl__eu_activity5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define rkl__eu_activity5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define rkl__eu_activity5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity5 :: GPU Busy */
#define rkl__eu_activity5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity5 :: VS Threads Dispatched */
#define rkl__eu_activity5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity5 :: HS Threads Dispatched */
#define rkl__eu_activity5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity5 :: DS Threads Dispatched */
#define rkl__eu_activity5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity5 :: GS Threads Dispatched */
#define rkl__eu_activity5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity5 :: FS Threads Dispatched */
#define rkl__eu_activity5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity5 :: CS Threads Dispatched */
#define rkl__eu_activity5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity5 :: Render Ring Busy */
#define rkl__eu_activity5__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity5 :: Compute Ring Busy */
#define rkl__eu_activity5__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity5 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity5__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity5 :: GTI Read Throughput */
#define rkl__eu_activity5__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity5 :: GTI Write Throughput */
#define rkl__eu_activity5__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity5 :: HS EM Pipe Active */
#define rkl__eu_activity5__hs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity5 :: DS EM Pipe Active */
#define rkl__eu_activity5__ds_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity5 :: HS Send Pipe Active */
#define rkl__eu_activity5__hs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity6 :: GPU Time Elapsed */
#define rkl__eu_activity6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity6 :: GPU Core Clocks */
#define rkl__eu_activity6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define rkl__eu_activity6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define rkl__eu_activity6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity6 :: GPU Busy */
#define rkl__eu_activity6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity6 :: VS Threads Dispatched */
#define rkl__eu_activity6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity6 :: HS Threads Dispatched */
#define rkl__eu_activity6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity6 :: DS Threads Dispatched */
#define rkl__eu_activity6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity6 :: GS Threads Dispatched */
#define rkl__eu_activity6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity6 :: FS Threads Dispatched */
#define rkl__eu_activity6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity6 :: CS Threads Dispatched */
#define rkl__eu_activity6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity6 :: Render Ring Busy */
#define rkl__eu_activity6__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity6 :: Compute Ring Busy */
#define rkl__eu_activity6__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity6 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity6__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity6 :: GTI Read Throughput */
#define rkl__eu_activity6__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity6 :: GTI Write Throughput */
#define rkl__eu_activity6__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity6 :: GS FPU Pipe Active */
#define rkl__eu_activity6__gs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity6 :: GS EM Pipe Active */
#define rkl__eu_activity6__gs_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity6 :: GS Send Pipe Active */
#define rkl__eu_activity6__gs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: GPU Time Elapsed */
#define rkl__eu_activity7__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity7 :: GPU Core Clocks */
#define rkl__eu_activity7__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define rkl__eu_activity7__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define rkl__eu_activity7__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity7 :: GPU Busy */
#define rkl__eu_activity7__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity7 :: VS Threads Dispatched */
#define rkl__eu_activity7__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity7 :: HS Threads Dispatched */
#define rkl__eu_activity7__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity7 :: DS Threads Dispatched */
#define rkl__eu_activity7__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity7 :: GS Threads Dispatched */
#define rkl__eu_activity7__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity7 :: FS Threads Dispatched */
#define rkl__eu_activity7__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity7 :: CS Threads Dispatched */
#define rkl__eu_activity7__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity7 :: EU FPU Pipe Active */
#define rkl__eu_activity7__fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity7 :: EM Pipe Active */
#define rkl__eu_activity7__em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: EU FPU And EM Pipes Active */
#define rkl__eu_activity7__eu_fpu_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity7 :: EU AVG IPC Rate */
#define rkl__eu_activity7__eu_avg_ipc_rate__read \
   tglgt1__eu_activity7__eu_avg_ipc_rate__read

/* EuActivity7 :: Render Ring Busy */
#define rkl__eu_activity7__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity7 :: Compute Ring Busy */
#define rkl__eu_activity7__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity7 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity7__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity7 :: GTI Read Throughput */
#define rkl__eu_activity7__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity7 :: GTI Write Throughput */
#define rkl__eu_activity7__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: GPU Time Elapsed */
#define rkl__eu_activity8__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity8 :: GPU Core Clocks */
#define rkl__eu_activity8__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define rkl__eu_activity8__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define rkl__eu_activity8__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity8 :: GPU Busy */
#define rkl__eu_activity8__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity8 :: VS Threads Dispatched */
#define rkl__eu_activity8__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity8 :: HS Threads Dispatched */
#define rkl__eu_activity8__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity8 :: DS Threads Dispatched */
#define rkl__eu_activity8__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity8 :: GS Threads Dispatched */
#define rkl__eu_activity8__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity8 :: FS Threads Dispatched */
#define rkl__eu_activity8__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity8 :: CS Threads Dispatched */
#define rkl__eu_activity8__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity8 :: Render Ring Busy */
#define rkl__eu_activity8__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity8 :: Compute Ring Busy */
#define rkl__eu_activity8__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity8 :: Render and compute engines are simultaneously busy */
#define rkl__eu_activity8__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity8 :: GTI Read Throughput */
#define rkl__eu_activity8__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity8 :: GTI Write Throughput */
#define rkl__eu_activity8__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: DS Send Pipe Active */
#define rkl__eu_activity8__ds_send_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set TestOa :: GPU Time Elapsed */
#define rkl__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TestOa :: GPU Core Clocks */
#define rkl__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define rkl__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define rkl__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TestOa :: TestCounter0 */
#define rkl__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Metric set TestOa :: TestCounter1 */
#define rkl__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Metric set TestOa :: TestCounter2 */
#define rkl__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Metric set TestOa :: TestCounter3 */
#define rkl__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Metric set TestOa :: TestCounter4 */
#define rkl__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Metric set TestOa :: TestCounter5 */
#define rkl__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Metric set TestOa :: TestCounter6 */
#define rkl__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Metric set TestOa :: TestCounter7 */
#define rkl__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set TestOa :: TestCounter8 */
#define rkl__test_oa__counter8__read \
   hsw__compute_extended__typed_writes0__read

/* Metric set TestOa :: TestCounter9 - OAR enable */
#define rkl__test_oa__counter9__read \
   hsw__compute_extended__untyped_writes0__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define dg1__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define dg1__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define dg1__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define dg1__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define dg1__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define dg1__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define dg1__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define dg1__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define dg1__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define dg1__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define dg1__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define dg1__render_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define dg1__render_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Thread Occupancy */
#define dg1__render_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics Basic set :: Sampler00 Busy */
#define dg1__render_basic__sampler00_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler Slice0 Dualsubslice0 is bottleneck */
#define dg1__render_basic__sampler00_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define dg1__render_basic__samplers_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define dg1__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define dg1__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define dg1__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define dg1__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define dg1__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define dg1__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define dg1__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define dg1__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define dg1__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define dg1__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define dg1__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define dg1__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define dg1__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define dg1__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define dg1__render_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define dg1__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define dg1__render_basic__gti_read_throughput__read \
   tglgt1__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define dg1__render_basic__gti_write_throughput__read \
   tglgt1__render_basic__gti_write_throughput__read

/* Compute Metrics Basic set :: GPU Time Elapsed */
#define dg1__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic set :: GPU Core Clocks */
#define dg1__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define dg1__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic set :: AVG GPU Core Frequency */
#define dg1__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic set :: GPU Busy */
#define dg1__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic set :: VS Threads Dispatched */
#define dg1__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: HS Threads Dispatched */
#define dg1__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic set :: DS Threads Dispatched */
#define dg1__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic set :: GS Threads Dispatched */
#define dg1__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic set :: FS Threads Dispatched */
#define dg1__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic set :: CS Threads Dispatched */
#define dg1__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic set :: EU Active */
#define dg1__compute_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Compute Metrics Basic set :: EU Stall */
#define dg1__compute_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Compute Metrics Basic set :: EU Thread Occupancy */
#define dg1__compute_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Compute Metrics Basic set :: Rasterized Pixels */
#define dg1__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic set :: Early Hi-Depth Test Fails */
#define dg1__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic set :: Early Depth Test Fails */
#define dg1__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: Samples Killed in FS */
#define dg1__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic set :: Pixels Failing Tests */
#define dg1__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic set :: Samples Written */
#define dg1__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic set :: Samples Blended */
#define dg1__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic set :: Sampler Texels */
#define dg1__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic set :: Sampler Texels Misses */
#define dg1__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic set :: SLM Bytes Read */
#define dg1__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic set :: SLM Bytes Written */
#define dg1__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic set :: Shader Memory Accesses */
#define dg1__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic set :: Shader Atomic Memory Accesses */
#define dg1__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic set :: L3 Shader Throughput */
#define dg1__compute_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic set :: Shader Barrier Messages */
#define dg1__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic set :: GTI Read Throughput */
#define dg1__compute_basic__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* Compute Metrics Basic set :: GTI Write Throughput */
#define dg1__compute_basic__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* Render Metrics for 3D Pipeline Profile :: GPU Time Elapsed */
#define dg1__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics for 3D Pipeline Profile :: GPU Core Clocks */
#define dg1__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define dg1__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define dg1__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics for 3D Pipeline Profile :: GPU Busy */
#define dg1__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics for 3D Pipeline Profile :: VS Threads Dispatched */
#define dg1__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics for 3D Pipeline Profile :: HS Threads Dispatched */
#define dg1__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics for 3D Pipeline Profile :: DS Threads Dispatched */
#define dg1__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics for 3D Pipeline Profile :: GS Threads Dispatched */
#define dg1__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics for 3D Pipeline Profile :: FS Threads Dispatched */
#define dg1__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics for 3D Pipeline Profile :: CS Threads Dispatched */
#define dg1__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics for 3D Pipeline Profile :: EU Active */
#define dg1__render_pipe_profile__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics for 3D Pipeline Profile :: EU Stall */
#define dg1__render_pipe_profile__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics for 3D Pipeline Profile :: EU Thread Occupancy */
#define dg1__render_pipe_profile__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics for 3D Pipeline Profile :: Rasterized Pixels */
#define dg1__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define dg1__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: Early Depth Test Fails */
#define dg1__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: Samples Killed in FS */
#define dg1__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics for 3D Pipeline Profile :: Pixels Failing Tests */
#define dg1__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics for 3D Pipeline Profile :: Samples Written */
#define dg1__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics for 3D Pipeline Profile :: Samples Blended */
#define dg1__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels */
#define dg1__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics for 3D Pipeline Profile :: Sampler Texels Misses */
#define dg1__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Read */
#define dg1__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics for 3D Pipeline Profile :: SLM Bytes Written */
#define dg1__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics for 3D Pipeline Profile :: Shader Memory Accesses */
#define dg1__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define dg1__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics for 3D Pipeline Profile :: L3 Shader Throughput */
#define dg1__render_pipe_profile__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics for 3D Pipeline Profile :: Shader Barrier Messages */
#define dg1__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics for 3D Pipeline Profile :: VF Bottleneck */
#define dg1__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: VS Bottleneck */
#define dg1__render_pipe_profile__vs_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: HS Bottleneck */
#define dg1__render_pipe_profile__hs_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: DS Bottleneck */
#define dg1__render_pipe_profile__ds_bottleneck__read \
   bdw__render_pipe_profile__sf_stall__read

/* Render Metrics for 3D Pipeline Profile :: GS Bottleneck */
#define dg1__render_pipe_profile__gs_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: SO Bottleneck */
#define dg1__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics for 3D Pipeline Profile :: Clipper Bottleneck */
#define dg1__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define dg1__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define dg1__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: BC Bottleneck */
#define dg1__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics for 3D Pipeline Profile :: HS Stall */
#define dg1__render_pipe_profile__hs_stall__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: DS Stall */
#define dg1__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: SO Stall */
#define dg1__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics for 3D Pipeline Profile :: CL Stall */
#define dg1__render_pipe_profile__cl_stall__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics for 3D Pipeline Profile :: SF Stall */
#define dg1__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set HDCAndSF (DG1) :: GPU Time Elapsed */
#define dg1__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF (DG1) :: GPU Core Clocks */
#define dg1__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF (DG1) :: AVG GPU Core Frequency */
#define dg1__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF (DG1) :: AVG GPU Core Frequency */
#define dg1__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF (DG1) :: GPU Busy */
#define dg1__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF (DG1) :: VS Threads Dispatched */
#define dg1__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF (DG1) :: HS Threads Dispatched */
#define dg1__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF (DG1) :: DS Threads Dispatched */
#define dg1__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF (DG1) :: GS Threads Dispatched */
#define dg1__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF (DG1) :: FS Threads Dispatched */
#define dg1__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF (DG1) :: CS Threads Dispatched */
#define dg1__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF (DG1) :: EU Active */
#define dg1__hdc_and_sf__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set HDCAndSF (DG1) :: EU Stall */
#define dg1__hdc_and_sf__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set HDCAndSF (DG1) :: EU Thread Occupancy */
#define dg1__hdc_and_sf__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set HDCAndSF (DG1) :: Rasterized Pixels */
#define dg1__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF (DG1) :: Early Hi-Depth Test Fails */
#define dg1__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF (DG1) :: Early Depth Test Fails */
#define dg1__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF (DG1) :: Samples Killed in FS */
#define dg1__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF (DG1) :: Pixels Failing Tests */
#define dg1__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF (DG1) :: Samples Written */
#define dg1__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF (DG1) :: Samples Blended */
#define dg1__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF (DG1) :: Sampler Texels */
#define dg1__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF (DG1) :: Sampler Texels Misses */
#define dg1__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF (DG1) :: SLM Bytes Read */
#define dg1__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF (DG1) :: SLM Bytes Written */
#define dg1__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF (DG1) :: Shader Memory Accesses */
#define dg1__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF (DG1) :: Shader Atomic Memory Accesses */
#define dg1__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF (DG1) :: L3 Shader Throughput */
#define dg1__hdc_and_sf__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF (DG1) :: Shader Barrier Messages */
#define dg1__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF (DG1) :: Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3 */
#define dg1__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   icl__tdl_2__non_ps_thread03_ready_for_dispatch__read

/* Metric set HDCAndSF (DG1) :: Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3 */
#define dg1__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   icl__tdl_2__non_ps_thread04_ready_for_dispatch__read

/* Metric set HDCAndSF (DG1) :: Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3 */
#define dg1__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   icl__tdl_2__non_ps_thread05_ready_for_dispatch__read

/* Metric set HDCAndSF (DG1) :: Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3 */
#define dg1__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read \
   icl__tdl_2__non_ps_thread06_ready_for_dispatch__read

/* Metric set HDCAndSF (DG1) :: Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3 */
#define dg1__hdc_and_sf__non_sampler_shader04_access_stalled_on_l3__read \
   icl__tdl_2__non_ps_thread07_ready_for_dispatch__read

/* Metric set HDCAndSF (DG1) :: Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3 */
static float
dg1__hdc_and_sf__non_sampler_shader05_access_stalled_on_l3__read(UNUSED struct intel_perf_config *perf,
                                                                 const struct intel_perf_query_info *query,
                                                                 const struct intel_perf_query_result *results)
{
   /* RPN equation: GPU_CLOCK 0 READ C 7 READ FSUB 100 UMUL $GpuCoreClocks FDIV */
   uint64_t tmp0 = results->accumulator[query->gpu_clock_offset + 0];
   uint64_t tmp1 = results->accumulator[query->c_offset + 7];
   double tmp2 = tmp0 - tmp1;
   uint64_t tmp3 = tmp2 * 100;
   double tmp4 = tmp3;
   double tmp5 = dg1__hdc_and_sf__gpu_core_clocks__read(perf, query, results);
   double tmp6 = tmp5 ? tmp4 / tmp5 : 0;

   return tmp6;
}

/* Metric set HDCAndSF (DG1) :: Polygon Data Ready */
#define dg1__hdc_and_sf__poly_data_ready__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF (DG1) :: Slice0 Pipe0 Post-EarlyZ Pixel Data Ready */
#define dg1__hdc_and_sf__pixel_data00_ready__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set HDCAndSF (DG1) :: Slice0 Pipe1 Post-EarlyZ Pixel Data Ready */
#define dg1__hdc_and_sf__pixel_data01_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set HDCAndSF (DG1) :: Slice0 Pipe2 Post-EarlyZ Pixel Data Ready */
#define dg1__hdc_and_sf__pixel_data02_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define dg1__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define dg1__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define dg1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define dg1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define dg1__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define dg1__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define dg1__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define dg1__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define dg1__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define dg1__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define dg1__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define dg1__rasterizer_and_pixel_backend__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define dg1__rasterizer_and_pixel_backend__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Thread Occupancy */
#define dg1__rasterizer_and_pixel_backend__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define dg1__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define dg1__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define dg1__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define dg1__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define dg1__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define dg1__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define dg1__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define dg1__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define dg1__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define dg1__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define dg1__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define dg1__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define dg1__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define dg1__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define dg1__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define dg1__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define dg1__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 PS Output Available */
#define dg1__rasterizer_and_pixel_backend__ps_output00_available__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 PS Output Available */
#define dg1__rasterizer_and_pixel_backend__ps_output01_available__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe2 PS Output Available */
#define dg1__rasterizer_and_pixel_backend__ps_output02_available__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Pixel Values Ready */
#define dg1__rasterizer_and_pixel_backend__pixel_values00_ready__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 Pixel Values Ready */
#define dg1__rasterizer_and_pixel_backend__pixel_values01_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe2 Pixel Values Ready */
#define dg1__rasterizer_and_pixel_backend__pixel_values02_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ00 is full */
#define dg1__rasterizer_and_pixel_backend__gt_request_queue00_full__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ01 is full */
#define dg1__rasterizer_and_pixel_backend__gt_request_queue01_full__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ10 is full */
#define dg1__rasterizer_and_pixel_backend__gt_request_queue10_full__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ11 is full */
#define dg1__rasterizer_and_pixel_backend__gt_request_queue11_full__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* L3_1 :: GPU Time Elapsed */
#define dg1__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_1 :: GPU Core Clocks */
#define dg1__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_1 :: AVG GPU Core Frequency */
#define dg1__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_1 :: AVG GPU Core Frequency */
#define dg1__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_1 :: GPU Busy */
#define dg1__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_1 :: EU Active */
#define dg1__l3_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_1 :: EU Stall */
#define dg1__l3_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_1 :: EU Thread Occupancy */
#define dg1__l3_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_1 :: VS Threads Dispatched */
#define dg1__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_1 :: HS Threads Dispatched */
#define dg1__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_1 :: DS Threads Dispatched */
#define dg1__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_1 :: GS Threads Dispatched */
#define dg1__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_1 :: FS Threads Dispatched */
#define dg1__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_1 :: CS Threads Dispatched */
#define dg1__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_1 :: Slice0 L3 Bank0 Input Available */
#define dg1__l3_1__l30_bank0_input_available__read \
   tglgt2__l3_1__l30_bank0_input_available__read

/* L3_1 :: Slice0 L3 Bank1 Input Available */
#define dg1__l3_1__l30_bank1_input_available__read \
   tglgt2__l3_1__l30_bank1_input_available__read

/* L3_1 :: Slice0 L3 Bank4 Input Available */
#define dg1__l3_1__l30_bank4_input_available__read \
   tglgt1__l3_2__l30_bank2_input_available__read

/* L3_1 :: Slice0 L3 Bank5 Input Available */
#define dg1__l3_1__l30_bank5_input_available__read \
   tglgt1__l3_2__l30_bank3_input_available__read

/* L3_2 :: GPU Time Elapsed */
#define dg1__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_2 :: GPU Core Clocks */
#define dg1__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_2 :: AVG GPU Core Frequency */
#define dg1__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_2 :: AVG GPU Core Frequency */
#define dg1__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_2 :: GPU Busy */
#define dg1__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_2 :: VS Threads Dispatched */
#define dg1__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_2 :: HS Threads Dispatched */
#define dg1__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_2 :: DS Threads Dispatched */
#define dg1__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_2 :: GS Threads Dispatched */
#define dg1__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_2 :: FS Threads Dispatched */
#define dg1__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_2 :: CS Threads Dispatched */
#define dg1__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_2 :: EU Active */
#define dg1__l3_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_2 :: EU Stall */
#define dg1__l3_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_2 :: EU Thread Occupancy */
#define dg1__l3_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_2 :: Slice0 L3 Bank2 Input Available */
#define dg1__l3_2__l30_bank2_input_available__read \
   tglgt1__l3_2__l30_bank2_input_available__read

/* L3_2 :: Slice0 L3 Bank3 Input Available */
#define dg1__l3_2__l30_bank3_input_available__read \
   tglgt1__l3_2__l30_bank3_input_available__read

/* L3_2 :: Slice0 L3 Bank6 Input Available */
#define dg1__l3_2__l30_bank6_input_available__read \
   tglgt2__l3_1__l30_bank1_input_available__read

/* L3_2 :: Slice0 L3 Bank7 Input Available */
#define dg1__l3_2__l30_bank7_input_available__read \
   tglgt2__l3_1__l30_bank0_input_available__read

/* L3_3 :: GPU Time Elapsed */
#define dg1__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_3 :: GPU Core Clocks */
#define dg1__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_3 :: AVG GPU Core Frequency */
#define dg1__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_3 :: AVG GPU Core Frequency */
#define dg1__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_3 :: GPU Busy */
#define dg1__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_3 :: VS Threads Dispatched */
#define dg1__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_3 :: HS Threads Dispatched */
#define dg1__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_3 :: DS Threads Dispatched */
#define dg1__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_3 :: GS Threads Dispatched */
#define dg1__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_3 :: FS Threads Dispatched */
#define dg1__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_3 :: CS Threads Dispatched */
#define dg1__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_3 :: EU Active */
#define dg1__l3_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_3 :: EU Stall */
#define dg1__l3_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_3 :: EU Thread Occupancy */
#define dg1__l3_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_3 :: Slice0 L3 Bank0 Output Ready */
#define dg1__l3_3__l30_bank0_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_3 :: Slice0 L3 Bank4 Output Ready */
#define dg1__l3_3__l30_bank4_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_4 :: GPU Time Elapsed */
#define dg1__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_4 :: GPU Core Clocks */
#define dg1__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_4 :: AVG GPU Core Frequency */
#define dg1__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_4 :: AVG GPU Core Frequency */
#define dg1__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_4 :: GPU Busy */
#define dg1__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_4 :: VS Threads Dispatched */
#define dg1__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_4 :: HS Threads Dispatched */
#define dg1__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_4 :: DS Threads Dispatched */
#define dg1__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_4 :: GS Threads Dispatched */
#define dg1__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_4 :: FS Threads Dispatched */
#define dg1__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_4 :: CS Threads Dispatched */
#define dg1__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_4 :: EU Active */
#define dg1__l3_4__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_4 :: EU Stall */
#define dg1__l3_4__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_4 :: EU Thread Occupancy */
#define dg1__l3_4__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_4 :: Slice0 L3 Bank1 Output Ready */
#define dg1__l3_4__l30_bank1_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_4 :: Slice0 L3 Bank5 Output Ready */
#define dg1__l3_4__l30_bank5_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_5 :: GPU Time Elapsed */
#define dg1__l3_5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_5 :: GPU Core Clocks */
#define dg1__l3_5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_5 :: AVG GPU Core Frequency */
#define dg1__l3_5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_5 :: AVG GPU Core Frequency */
#define dg1__l3_5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_5 :: GPU Busy */
#define dg1__l3_5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_5 :: VS Threads Dispatched */
#define dg1__l3_5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_5 :: HS Threads Dispatched */
#define dg1__l3_5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_5 :: DS Threads Dispatched */
#define dg1__l3_5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_5 :: GS Threads Dispatched */
#define dg1__l3_5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_5 :: FS Threads Dispatched */
#define dg1__l3_5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_5 :: CS Threads Dispatched */
#define dg1__l3_5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_5 :: EU Active */
#define dg1__l3_5__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_5 :: EU Stall */
#define dg1__l3_5__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_5 :: EU Thread Occupancy */
#define dg1__l3_5__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_5 :: Slice0 L3 Bank2 Output Ready */
#define dg1__l3_5__l30_bank2_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_5 :: Slice0 L3 Bank6 Output Ready */
#define dg1__l3_5__l30_bank6_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_6 :: GPU Time Elapsed */
#define dg1__l3_6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_6 :: GPU Core Clocks */
#define dg1__l3_6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_6 :: AVG GPU Core Frequency */
#define dg1__l3_6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_6 :: AVG GPU Core Frequency */
#define dg1__l3_6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_6 :: GPU Busy */
#define dg1__l3_6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_6 :: VS Threads Dispatched */
#define dg1__l3_6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_6 :: HS Threads Dispatched */
#define dg1__l3_6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_6 :: DS Threads Dispatched */
#define dg1__l3_6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_6 :: GS Threads Dispatched */
#define dg1__l3_6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_6 :: FS Threads Dispatched */
#define dg1__l3_6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_6 :: CS Threads Dispatched */
#define dg1__l3_6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_6 :: EU Active */
#define dg1__l3_6__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_6 :: EU Stall */
#define dg1__l3_6__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_6 :: EU Thread Occupancy */
#define dg1__l3_6__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_6 :: Slice0 L3 Bank3 Output Ready */
#define dg1__l3_6__l30_bank3_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_6 :: Slice0 L3 Bank7 Output Ready */
#define dg1__l3_6__l30_bank7_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* Sampler_1 :: GPU Time Elapsed */
#define dg1__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_1 :: GPU Core Clocks */
#define dg1__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define dg1__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define dg1__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_1 :: GPU Busy */
#define dg1__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_1 :: VS Threads Dispatched */
#define dg1__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_1 :: HS Threads Dispatched */
#define dg1__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_1 :: DS Threads Dispatched */
#define dg1__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_1 :: GS Threads Dispatched */
#define dg1__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_1 :: FS Threads Dispatched */
#define dg1__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_1 :: CS Threads Dispatched */
#define dg1__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_1 :: EU Active */
#define dg1__sampler_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_1 :: EU Stall */
#define dg1__sampler_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_1 :: EU Thread Occupancy */
#define dg1__sampler_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_1 :: Slice0 DualSubslice0 Input Available */
#define dg1__sampler_1__sampler00_input_available__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice1 Input Available */
#define dg1__sampler_1__sampler01_input_available__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice2 Input Available */
#define dg1__sampler_1__sampler02_input_available__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice3 Input Available */
#define dg1__sampler_1__sampler03_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice4 Input Available */
#define dg1__sampler_1__sampler04_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_1 :: Slice0 DualSubslice5 Input Available */
#define dg1__sampler_1__sampler05_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Sampler_2 :: GPU Time Elapsed */
#define dg1__sampler_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_2 :: GPU Core Clocks */
#define dg1__sampler_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_2 :: AVG GPU Core Frequency */
#define dg1__sampler_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_2 :: AVG GPU Core Frequency */
#define dg1__sampler_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_2 :: GPU Busy */
#define dg1__sampler_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_2 :: VS Threads Dispatched */
#define dg1__sampler_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_2 :: HS Threads Dispatched */
#define dg1__sampler_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_2 :: DS Threads Dispatched */
#define dg1__sampler_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_2 :: GS Threads Dispatched */
#define dg1__sampler_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_2 :: FS Threads Dispatched */
#define dg1__sampler_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_2 :: CS Threads Dispatched */
#define dg1__sampler_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_2 :: EU Active */
#define dg1__sampler_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_2 :: EU Stall */
#define dg1__sampler_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_2 :: EU Thread Occupancy */
#define dg1__sampler_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_2 :: Slice0 DualSubslice0 Sampler Output Ready */
#define dg1__sampler_2__sampler00_output_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice1 Sampler Output Ready */
#define dg1__sampler_2__sampler01_output_ready__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice2 Sampler Output Ready */
#define dg1__sampler_2__sampler02_output_ready__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice3 Sampler Output Ready */
#define dg1__sampler_2__sampler03_output_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice4 Sampler Output Ready */
#define dg1__sampler_2__sampler04_output_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_2 :: Slice0 DualSubslice5 Sampler Output Ready */
#define dg1__sampler_2__sampler05_output_ready__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: GPU Time Elapsed */
#define dg1__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_1 :: GPU Core Clocks */
#define dg1__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_1 :: AVG GPU Core Frequency */
#define dg1__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_1 :: AVG GPU Core Frequency */
#define dg1__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_1 :: GPU Busy */
#define dg1__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_1 :: VS Threads Dispatched */
#define dg1__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_1 :: HS Threads Dispatched */
#define dg1__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_1 :: DS Threads Dispatched */
#define dg1__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_1 :: GS Threads Dispatched */
#define dg1__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_1 :: FS Threads Dispatched */
#define dg1__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_1 :: CS Threads Dispatched */
#define dg1__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_1 :: EU Active */
#define dg1__tdl_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_1 :: EU Stall */
#define dg1__tdl_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_1 :: EU Thread Occupancy */
#define dg1__tdl_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher */
#define dg1__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher */
#define dg1__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher */
#define dg1__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher */
#define dg1__tdl_1__non_ps_thread03_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher */
#define dg1__tdl_1__non_ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher */
#define dg1__tdl_1__non_ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0 */
#define dg1__tdl_1__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1 */
#define dg1__tdl_1__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2 */
#define dg1__tdl_1__thread_header00_ready_port2__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3 */
#define dg1__tdl_1__thread_header00_ready_port3__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0 */
#define dg1__tdl_1__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1 */
#define dg1__tdl_1__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2 */
#define dg1__tdl_1__thread_header01_ready_port2__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3 */
#define dg1__tdl_1__thread_header01_ready_port3__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher */
#define dg1__tdl_1__thread_header00_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher */
#define dg1__tdl_1__thread_header01_ready__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* TDL_2 :: GPU Time Elapsed */
#define dg1__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_2 :: GPU Core Clocks */
#define dg1__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_2 :: AVG GPU Core Frequency */
#define dg1__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_2 :: AVG GPU Core Frequency */
#define dg1__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_2 :: GPU Busy */
#define dg1__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_2 :: VS Threads Dispatched */
#define dg1__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_2 :: HS Threads Dispatched */
#define dg1__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_2 :: DS Threads Dispatched */
#define dg1__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_2 :: GS Threads Dispatched */
#define dg1__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_2 :: FS Threads Dispatched */
#define dg1__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_2 :: CS Threads Dispatched */
#define dg1__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_2 :: EU Active */
#define dg1__tdl_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_2 :: EU Stall */
#define dg1__tdl_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_2 :: EU Thread Occupancy */
#define dg1__tdl_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher */
#define dg1__tdl_2__ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher */
#define dg1__tdl_2__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice2 Thread Dispatcher */
#define dg1__tdl_2__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher */
#define dg1__tdl_2__ps_thread03_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher */
#define dg1__tdl_2__ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher */
#define dg1__tdl_2__ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher */
#define dg1__tdl_2__thread_header05_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0 */
#define dg1__tdl_2__thread_header05_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1 */
#define dg1__tdl_2__thread_header05_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 2 */
#define dg1__tdl_2__thread_header05_ready_port2__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3 */
#define dg1__tdl_2__thread_header05_ready_port3__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_3 :: GPU Time Elapsed */
#define dg1__tdl_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_3 :: GPU Core Clocks */
#define dg1__tdl_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_3 :: AVG GPU Core Frequency */
#define dg1__tdl_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_3 :: AVG GPU Core Frequency */
#define dg1__tdl_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_3 :: GPU Busy */
#define dg1__tdl_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_3 :: VS Threads Dispatched */
#define dg1__tdl_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_3 :: HS Threads Dispatched */
#define dg1__tdl_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_3 :: DS Threads Dispatched */
#define dg1__tdl_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_3 :: GS Threads Dispatched */
#define dg1__tdl_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_3 :: FS Threads Dispatched */
#define dg1__tdl_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_3 :: CS Threads Dispatched */
#define dg1__tdl_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_3 :: EU Active */
#define dg1__tdl_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_3 :: EU Stall */
#define dg1__tdl_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_3 :: EU Thread Occupancy */
#define dg1__tdl_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher */
#define dg1__tdl_3__thread_header02_ready__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher */
#define dg1__tdl_3__thread_header03_ready__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher */
#define dg1__tdl_3__thread_header04_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0 */
#define dg1__tdl_3__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 1 */
#define dg1__tdl_3__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2 */
#define dg1__tdl_3__thread_header02_ready_port2__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 3 */
#define dg1__tdl_3__thread_header02_ready_port3__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0 */
#define dg1__tdl_3__thread_header03_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1 */
#define dg1__tdl_3__thread_header03_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2 */
#define dg1__tdl_3__thread_header03_ready_port2__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3 */
#define dg1__tdl_3__thread_header03_ready_port3__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0 */
#define dg1__tdl_3__thread_header04_ready_port0__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1 */
#define dg1__tdl_3__thread_header04_ready_port1__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2 */
#define dg1__tdl_3__thread_header04_ready_port2__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3 */
#define dg1__tdl_3__thread_header04_ready_port3__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* GpuBusyness :: GPU Time Elapsed */
#define dg1__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* GpuBusyness :: GPU Core Clocks */
#define dg1__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define dg1__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define dg1__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* GpuBusyness :: GPU Busy */
#define dg1__gpu_busyness__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* GpuBusyness :: EU Active */
#define dg1__gpu_busyness__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* GpuBusyness :: EU Stall */
#define dg1__gpu_busyness__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* GpuBusyness :: EU Thread Occupancy */
#define dg1__gpu_busyness__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* GpuBusyness :: VS Threads Dispatched */
#define dg1__gpu_busyness__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* GpuBusyness :: HS Threads Dispatched */
#define dg1__gpu_busyness__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* GpuBusyness :: DS Threads Dispatched */
#define dg1__gpu_busyness__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* GpuBusyness :: GS Threads Dispatched */
#define dg1__gpu_busyness__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* GpuBusyness :: FS Threads Dispatched */
#define dg1__gpu_busyness__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* GpuBusyness :: CS Threads Dispatched */
#define dg1__gpu_busyness__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* GpuBusyness :: Render Ring Busy */
#define dg1__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* GpuBusyness :: Compute Ring Busy */
#define dg1__gpu_busyness__compute_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* GpuBusyness :: Posh Ring Busy */
#define dg1__gpu_busyness__posh_engine_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* GpuBusyness :: Blitter Ring Busy */
#define dg1__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* GpuBusyness :: Vebox Ring Busy */
#define dg1__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* GpuBusyness :: Vdbox0 Ring Busy */
#define dg1__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* GpuBusyness :: Vdbox1 Ring Busy */
#define dg1__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__ds_stall__read

/* GpuBusyness :: Render and compute engines are simultaneously busy */
#define dg1__gpu_busyness__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* GpuBusyness :: Any Engine Busy */
#define dg1__gpu_busyness__any_engine_busy__read \
   bdw__render_basic__sampler1_busy__read

/* EuActivity1 :: GPU Time Elapsed */
#define dg1__eu_activity1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity1 :: GPU Core Clocks */
#define dg1__eu_activity1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define dg1__eu_activity1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define dg1__eu_activity1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity1 :: GPU Busy */
#define dg1__eu_activity1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity1 :: VS Threads Dispatched */
#define dg1__eu_activity1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity1 :: HS Threads Dispatched */
#define dg1__eu_activity1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity1 :: DS Threads Dispatched */
#define dg1__eu_activity1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity1 :: GS Threads Dispatched */
#define dg1__eu_activity1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity1 :: FS Threads Dispatched */
#define dg1__eu_activity1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity1 :: CS Threads Dispatched */
#define dg1__eu_activity1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity1 :: Render Ring Busy */
#define dg1__eu_activity1__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity1 :: Compute Ring Busy */
#define dg1__eu_activity1__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity1 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity1__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity1 :: GTI Read Throughput */
#define dg1__eu_activity1__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity1 :: GTI Write Throughput */
#define dg1__eu_activity1__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity1 :: VS FPU Pipe Active */
#define dg1__eu_activity1__vs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity1 :: PS FPU Pipe Active */
#define dg1__eu_activity1__ps_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity1 :: EU Send Pipe Active */
#define dg1__eu_activity1__eu_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: GPU Time Elapsed */
#define dg1__eu_activity2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity2 :: GPU Core Clocks */
#define dg1__eu_activity2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define dg1__eu_activity2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define dg1__eu_activity2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity2 :: GPU Busy */
#define dg1__eu_activity2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity2 :: VS Threads Dispatched */
#define dg1__eu_activity2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity2 :: HS Threads Dispatched */
#define dg1__eu_activity2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity2 :: DS Threads Dispatched */
#define dg1__eu_activity2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity2 :: GS Threads Dispatched */
#define dg1__eu_activity2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity2 :: FS Threads Dispatched */
#define dg1__eu_activity2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity2 :: CS Threads Dispatched */
#define dg1__eu_activity2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity2 :: Render Ring Busy */
#define dg1__eu_activity2__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity2 :: Compute Ring Busy */
#define dg1__eu_activity2__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity2 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity2__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity2 :: GTI Read Throughput */
#define dg1__eu_activity2__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity2 :: GTI Write Throughput */
#define dg1__eu_activity2__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity2 :: CS EM Pipe Active */
#define dg1__eu_activity2__cs_em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: CS FPU Pipe Active */
#define dg1__eu_activity2__cs_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity2 :: CS Send Pipeline Active */
#define dg1__eu_activity2__cs_send_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: GPU Time Elapsed */
#define dg1__eu_activity3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity3 :: GPU Core Clocks */
#define dg1__eu_activity3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define dg1__eu_activity3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define dg1__eu_activity3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity3 :: GPU Busy */
#define dg1__eu_activity3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity3 :: VS Threads Dispatched */
#define dg1__eu_activity3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity3 :: HS Threads Dispatched */
#define dg1__eu_activity3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity3 :: DS Threads Dispatched */
#define dg1__eu_activity3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity3 :: GS Threads Dispatched */
#define dg1__eu_activity3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity3 :: FS Threads Dispatched */
#define dg1__eu_activity3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity3 :: CS Threads Dispatched */
#define dg1__eu_activity3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity3 :: Render Ring Busy */
#define dg1__eu_activity3__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity3 :: Compute Ring Busy */
#define dg1__eu_activity3__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity3 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity3__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity3 :: GTI Read Throughput */
#define dg1__eu_activity3__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity3 :: GTI Write Throughput */
#define dg1__eu_activity3__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity3 :: VS EM Pipe Active */
#define dg1__eu_activity3__vs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: PS EM Pipe Active */
#define dg1__eu_activity3__ps_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity3 :: PS Send Pipeline Active */
#define dg1__eu_activity3__ps_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity4 :: GPU Time Elapsed */
#define dg1__eu_activity4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity4 :: GPU Core Clocks */
#define dg1__eu_activity4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define dg1__eu_activity4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define dg1__eu_activity4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity4 :: GPU Busy */
#define dg1__eu_activity4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity4 :: VS Threads Dispatched */
#define dg1__eu_activity4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity4 :: HS Threads Dispatched */
#define dg1__eu_activity4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity4 :: DS Threads Dispatched */
#define dg1__eu_activity4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity4 :: GS Threads Dispatched */
#define dg1__eu_activity4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity4 :: FS Threads Dispatched */
#define dg1__eu_activity4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity4 :: CS Threads Dispatched */
#define dg1__eu_activity4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity4 :: Render Ring Busy */
#define dg1__eu_activity4__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity4 :: Compute Ring Busy */
#define dg1__eu_activity4__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity4 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity4__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity4 :: GTI Read Throughput */
#define dg1__eu_activity4__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity4 :: GTI Write Throughput */
#define dg1__eu_activity4__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity4 :: HS FPU Pipe Active */
#define dg1__eu_activity4__hs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity4 :: DS FPU Pipe Active */
#define dg1__eu_activity4__ds_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity4 :: VS Send Pipe Active */
#define dg1__eu_activity4__vs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity5 :: GPU Time Elapsed */
#define dg1__eu_activity5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity5 :: GPU Core Clocks */
#define dg1__eu_activity5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define dg1__eu_activity5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define dg1__eu_activity5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity5 :: GPU Busy */
#define dg1__eu_activity5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity5 :: VS Threads Dispatched */
#define dg1__eu_activity5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity5 :: HS Threads Dispatched */
#define dg1__eu_activity5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity5 :: DS Threads Dispatched */
#define dg1__eu_activity5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity5 :: GS Threads Dispatched */
#define dg1__eu_activity5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity5 :: FS Threads Dispatched */
#define dg1__eu_activity5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity5 :: CS Threads Dispatched */
#define dg1__eu_activity5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity5 :: Render Ring Busy */
#define dg1__eu_activity5__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity5 :: Compute Ring Busy */
#define dg1__eu_activity5__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity5 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity5__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity5 :: GTI Read Throughput */
#define dg1__eu_activity5__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity5 :: GTI Write Throughput */
#define dg1__eu_activity5__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity5 :: HS EM Pipe Active */
#define dg1__eu_activity5__hs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity5 :: DS EM Pipe Active */
#define dg1__eu_activity5__ds_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity5 :: HS Send Pipe Active */
#define dg1__eu_activity5__hs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity6 :: GPU Time Elapsed */
#define dg1__eu_activity6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity6 :: GPU Core Clocks */
#define dg1__eu_activity6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define dg1__eu_activity6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define dg1__eu_activity6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity6 :: GPU Busy */
#define dg1__eu_activity6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity6 :: VS Threads Dispatched */
#define dg1__eu_activity6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity6 :: HS Threads Dispatched */
#define dg1__eu_activity6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity6 :: DS Threads Dispatched */
#define dg1__eu_activity6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity6 :: GS Threads Dispatched */
#define dg1__eu_activity6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity6 :: FS Threads Dispatched */
#define dg1__eu_activity6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity6 :: CS Threads Dispatched */
#define dg1__eu_activity6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity6 :: Render Ring Busy */
#define dg1__eu_activity6__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity6 :: Compute Ring Busy */
#define dg1__eu_activity6__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity6 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity6__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity6 :: GTI Read Throughput */
#define dg1__eu_activity6__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity6 :: GTI Write Throughput */
#define dg1__eu_activity6__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity6 :: GS FPU Pipe Active */
#define dg1__eu_activity6__gs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity6 :: GS EM Pipe Active */
#define dg1__eu_activity6__gs_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity6 :: GS Send Pipe Active */
#define dg1__eu_activity6__gs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: GPU Time Elapsed */
#define dg1__eu_activity7__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity7 :: GPU Core Clocks */
#define dg1__eu_activity7__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define dg1__eu_activity7__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define dg1__eu_activity7__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity7 :: GPU Busy */
#define dg1__eu_activity7__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity7 :: VS Threads Dispatched */
#define dg1__eu_activity7__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity7 :: HS Threads Dispatched */
#define dg1__eu_activity7__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity7 :: DS Threads Dispatched */
#define dg1__eu_activity7__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity7 :: GS Threads Dispatched */
#define dg1__eu_activity7__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity7 :: FS Threads Dispatched */
#define dg1__eu_activity7__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity7 :: CS Threads Dispatched */
#define dg1__eu_activity7__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity7 :: EU FPU Pipe Active */
#define dg1__eu_activity7__fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity7 :: EM Pipe Active */
#define dg1__eu_activity7__em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: EU FPU And EM Pipes Active */
#define dg1__eu_activity7__eu_fpu_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity7 :: EU AVG IPC Rate */
#define dg1__eu_activity7__eu_avg_ipc_rate__read \
   tglgt1__eu_activity7__eu_avg_ipc_rate__read

/* EuActivity7 :: Render Ring Busy */
#define dg1__eu_activity7__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity7 :: Compute Ring Busy */
#define dg1__eu_activity7__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity7 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity7__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity7 :: GTI Read Throughput */
#define dg1__eu_activity7__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity7 :: GTI Write Throughput */
#define dg1__eu_activity7__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: GPU Time Elapsed */
#define dg1__eu_activity8__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity8 :: GPU Core Clocks */
#define dg1__eu_activity8__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define dg1__eu_activity8__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define dg1__eu_activity8__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity8 :: GPU Busy */
#define dg1__eu_activity8__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity8 :: VS Threads Dispatched */
#define dg1__eu_activity8__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity8 :: HS Threads Dispatched */
#define dg1__eu_activity8__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity8 :: DS Threads Dispatched */
#define dg1__eu_activity8__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity8 :: GS Threads Dispatched */
#define dg1__eu_activity8__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity8 :: FS Threads Dispatched */
#define dg1__eu_activity8__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity8 :: CS Threads Dispatched */
#define dg1__eu_activity8__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity8 :: Render Ring Busy */
#define dg1__eu_activity8__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity8 :: Compute Ring Busy */
#define dg1__eu_activity8__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity8 :: Render and compute engines are simultaneously busy */
#define dg1__eu_activity8__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity8 :: GTI Read Throughput */
#define dg1__eu_activity8__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity8 :: GTI Write Throughput */
#define dg1__eu_activity8__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: DS Send Pipe Active */
#define dg1__eu_activity8__ds_send_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set TestOa :: GPU Time Elapsed */
#define dg1__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TestOa :: GPU Core Clocks */
#define dg1__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define dg1__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define dg1__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TestOa :: TestCounter0 */
#define dg1__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Metric set TestOa :: TestCounter1 */
#define dg1__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Metric set TestOa :: TestCounter2 */
#define dg1__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Metric set TestOa :: TestCounter3 */
#define dg1__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Metric set TestOa :: TestCounter4 */
#define dg1__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Metric set TestOa :: TestCounter5 */
#define dg1__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Metric set TestOa :: TestCounter6 */
#define dg1__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Metric set TestOa :: TestCounter7 */
#define dg1__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set TestOa :: TestCounter8 */
#define dg1__test_oa__counter8__read \
   hsw__compute_extended__typed_writes0__read

/* Metric set TestOa :: TestCounter9 - OAR enable */
#define dg1__test_oa__counter9__read \
   hsw__compute_extended__untyped_writes0__read

/* Render Metrics Basic set :: GPU Time Elapsed */
#define adl__render_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics Basic set :: GPU Core Clocks */
#define adl__render_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define adl__render_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics Basic set :: AVG GPU Core Frequency */
#define adl__render_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics Basic set :: VS Threads Dispatched */
#define adl__render_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics Basic set :: HS Threads Dispatched */
#define adl__render_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics Basic set :: DS Threads Dispatched */
#define adl__render_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics Basic set :: GS Threads Dispatched */
#define adl__render_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics Basic set :: FS Threads Dispatched */
#define adl__render_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics Basic set :: CS Threads Dispatched */
#define adl__render_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics Basic set :: GPU Busy */
#define adl__render_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics Basic set :: EU Active */
#define adl__render_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics Basic set :: EU Stall */
#define adl__render_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics Basic set :: EU Thread Occupancy */
#define adl__render_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics Basic set :: Sampler00 Busy */
#define adl__render_basic__sampler00_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Sampler Slice0 Dualsubslice0 is bottleneck */
#define adl__render_basic__sampler00_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Samplers Busy */
#define adl__render_basic__samplers_busy__read \
   bdw__render_basic__sampler0_busy__read

/* Render Metrics Basic set :: Samplers Bottleneck */
#define adl__render_basic__sampler_bottleneck__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics Basic set :: Rasterized Pixels */
#define adl__render_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics Basic set :: Early Hi-Depth Test Fails */
#define adl__render_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics Basic set :: Early Depth Test Fails */
#define adl__render_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: Samples Killed in FS */
#define adl__render_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics Basic set :: Pixels Failing Tests */
#define adl__render_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics Basic set :: Samples Written */
#define adl__render_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics Basic set :: Samples Blended */
#define adl__render_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics Basic set :: Sampler Texels */
#define adl__render_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics Basic set :: Sampler Texels Misses */
#define adl__render_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics Basic set :: SLM Bytes Read */
#define adl__render_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics Basic set :: SLM Bytes Written */
#define adl__render_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics Basic set :: Shader Memory Accesses */
#define adl__render_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics Basic set :: Shader Atomic Memory Accesses */
#define adl__render_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics Basic set :: L3 Shader Throughput */
#define adl__render_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics Basic set :: Shader Barrier Messages */
#define adl__render_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics Basic set :: GTI Read Throughput */
#define adl__render_basic__gti_read_throughput__read \
   tglgt1__render_basic__gti_read_throughput__read

/* Render Metrics Basic set :: GTI Write Throughput */
#define adl__render_basic__gti_write_throughput__read \
   tglgt1__render_basic__gti_write_throughput__read

/* Compute Metrics Basic :: GPU Time Elapsed */
#define adl__compute_basic__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Compute Metrics Basic :: GPU Core Clocks */
#define adl__compute_basic__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Compute Metrics Basic :: AVG GPU Core Frequency */
#define adl__compute_basic__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Compute Metrics Basic :: AVG GPU Core Frequency */
#define adl__compute_basic__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Compute Metrics Basic :: GPU Busy */
#define adl__compute_basic__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Compute Metrics Basic :: VS Threads Dispatched */
#define adl__compute_basic__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Compute Metrics Basic :: HS Threads Dispatched */
#define adl__compute_basic__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Compute Metrics Basic :: DS Threads Dispatched */
#define adl__compute_basic__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Compute Metrics Basic :: GS Threads Dispatched */
#define adl__compute_basic__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Compute Metrics Basic :: FS Threads Dispatched */
#define adl__compute_basic__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Compute Metrics Basic :: CS Threads Dispatched */
#define adl__compute_basic__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Compute Metrics Basic :: EU Active */
#define adl__compute_basic__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Compute Metrics Basic :: EU Stall */
#define adl__compute_basic__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Compute Metrics Basic :: EU Thread Occupancy */
#define adl__compute_basic__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Compute Metrics Basic :: Rasterized Pixels */
#define adl__compute_basic__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Compute Metrics Basic :: Early Hi-Depth Test Fails */
#define adl__compute_basic__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Compute Metrics Basic :: Early Depth Test Fails */
#define adl__compute_basic__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic :: Samples Killed in FS */
#define adl__compute_basic__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Compute Metrics Basic :: Pixels Failing Tests */
#define adl__compute_basic__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Compute Metrics Basic :: Samples Written */
#define adl__compute_basic__samples_written__read \
   bdw__render_basic__samples_written__read

/* Compute Metrics Basic :: Samples Blended */
#define adl__compute_basic__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Compute Metrics Basic :: Sampler Texels */
#define adl__compute_basic__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Compute Metrics Basic :: Sampler Texels Misses */
#define adl__compute_basic__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Compute Metrics Basic :: SLM Bytes Read */
#define adl__compute_basic__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Compute Metrics Basic :: SLM Bytes Written */
#define adl__compute_basic__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Compute Metrics Basic :: Shader Memory Accesses */
#define adl__compute_basic__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Compute Metrics Basic :: Shader Atomic Memory Accesses */
#define adl__compute_basic__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Compute Metrics Basic :: L3 Shader Throughput */
#define adl__compute_basic__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Compute Metrics Basic :: Shader Barrier Messages */
#define adl__compute_basic__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Compute Metrics Basic :: GTI Read Throughput */
#define adl__compute_basic__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* Compute Metrics Basic :: GTI Write Throughput */
#define adl__compute_basic__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Time Elapsed */
#define adl__render_pipe_profile__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Render Metrics set for 3D Pipeline Profile :: GPU Core Clocks */
#define adl__render_pipe_profile__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define adl__render_pipe_profile__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Render Metrics set for 3D Pipeline Profile :: AVG GPU Core Frequency */
#define adl__render_pipe_profile__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Render Metrics set for 3D Pipeline Profile :: GPU Busy */
#define adl__render_pipe_profile__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Render Metrics set for 3D Pipeline Profile :: VS Threads Dispatched */
#define adl__render_pipe_profile__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: HS Threads Dispatched */
#define adl__render_pipe_profile__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: DS Threads Dispatched */
#define adl__render_pipe_profile__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Render Metrics set for 3D Pipeline Profile :: GS Threads Dispatched */
#define adl__render_pipe_profile__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: FS Threads Dispatched */
#define adl__render_pipe_profile__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Render Metrics set for 3D Pipeline Profile :: CS Threads Dispatched */
#define adl__render_pipe_profile__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Render Metrics set for 3D Pipeline Profile :: EU Active */
#define adl__render_pipe_profile__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Render Metrics set for 3D Pipeline Profile :: EU Stall */
#define adl__render_pipe_profile__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Render Metrics set for 3D Pipeline Profile :: EU Thread Occupancy */
#define adl__render_pipe_profile__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Render Metrics set for 3D Pipeline Profile :: Rasterized Pixels */
#define adl__render_pipe_profile__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Render Metrics set for 3D Pipeline Profile :: Early Hi-Depth Test Fails */
#define adl__render_pipe_profile__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Early Depth Test Fails */
#define adl__render_pipe_profile__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Killed in FS */
#define adl__render_pipe_profile__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Render Metrics set for 3D Pipeline Profile :: Pixels Failing Tests */
#define adl__render_pipe_profile__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Written */
#define adl__render_pipe_profile__samples_written__read \
   bdw__render_basic__samples_written__read

/* Render Metrics set for 3D Pipeline Profile :: Samples Blended */
#define adl__render_pipe_profile__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels */
#define adl__render_pipe_profile__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Render Metrics set for 3D Pipeline Profile :: Sampler Texels Misses */
#define adl__render_pipe_profile__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Read */
#define adl__render_pipe_profile__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Render Metrics set for 3D Pipeline Profile :: SLM Bytes Written */
#define adl__render_pipe_profile__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Memory Accesses */
#define adl__render_pipe_profile__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Atomic Memory Accesses */
#define adl__render_pipe_profile__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Render Metrics set for 3D Pipeline Profile :: L3 Shader Throughput */
#define adl__render_pipe_profile__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Render Metrics set for 3D Pipeline Profile :: Shader Barrier Messages */
#define adl__render_pipe_profile__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Render Metrics set for 3D Pipeline Profile :: VF Bottleneck */
#define adl__render_pipe_profile__vf_bottleneck__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: VS Bottleneck */
#define adl__render_pipe_profile__vs_bottleneck__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: HS Bottleneck */
#define adl__render_pipe_profile__hs_bottleneck__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Bottleneck */
#define adl__render_pipe_profile__ds_bottleneck__read \
   bdw__render_pipe_profile__sf_stall__read

/* Render Metrics set for 3D Pipeline Profile :: GS Bottleneck */
#define adl__render_pipe_profile__gs_bottleneck__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Bottleneck */
#define adl__render_pipe_profile__so_bottleneck__read \
   bdw__render_pipe_profile__cl_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Clipper Bottleneck */
#define adl__render_pipe_profile__cl_bottleneck__read \
   bdw__render_pipe_profile__so_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Strip-Fans Bottleneck */
#define adl__render_pipe_profile__sf_bottleneck__read \
   bdw__render_pipe_profile__ds_stall__read

/* Render Metrics set for 3D Pipeline Profile :: Hi-Depth Bottleneck */
#define adl__render_pipe_profile__hi_depth_bottleneck__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: BC Bottleneck */
#define adl__render_pipe_profile__bc_bottleneck__read \
   bdw__render_pipe_profile__hs_stall__read

/* Render Metrics set for 3D Pipeline Profile :: HS Stall */
#define adl__render_pipe_profile__hs_stall__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: DS Stall */
#define adl__render_pipe_profile__ds_stall__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: SO Stall */
#define adl__render_pipe_profile__so_stall__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Render Metrics set for 3D Pipeline Profile :: CL Stall */
#define adl__render_pipe_profile__cl_stall__read \
   bdw__render_basic__sampler1_busy__read

/* Render Metrics set for 3D Pipeline Profile :: SF Stall */
#define adl__render_pipe_profile__sf_stall__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set HDCAndSF :: GPU Time Elapsed */
#define adl__hdc_and_sf__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set HDCAndSF :: GPU Core Clocks */
#define adl__hdc_and_sf__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define adl__hdc_and_sf__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set HDCAndSF :: AVG GPU Core Frequency */
#define adl__hdc_and_sf__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set HDCAndSF :: GPU Busy */
#define adl__hdc_and_sf__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set HDCAndSF :: VS Threads Dispatched */
#define adl__hdc_and_sf__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: HS Threads Dispatched */
#define adl__hdc_and_sf__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set HDCAndSF :: DS Threads Dispatched */
#define adl__hdc_and_sf__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set HDCAndSF :: GS Threads Dispatched */
#define adl__hdc_and_sf__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set HDCAndSF :: FS Threads Dispatched */
#define adl__hdc_and_sf__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set HDCAndSF :: CS Threads Dispatched */
#define adl__hdc_and_sf__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set HDCAndSF :: EU Active */
#define adl__hdc_and_sf__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set HDCAndSF :: EU Stall */
#define adl__hdc_and_sf__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set HDCAndSF :: EU Thread Occupancy */
#define adl__hdc_and_sf__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set HDCAndSF :: Rasterized Pixels */
#define adl__hdc_and_sf__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set HDCAndSF :: Early Hi-Depth Test Fails */
#define adl__hdc_and_sf__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set HDCAndSF :: Early Depth Test Fails */
#define adl__hdc_and_sf__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Samples Killed in FS */
#define adl__hdc_and_sf__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set HDCAndSF :: Pixels Failing Tests */
#define adl__hdc_and_sf__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set HDCAndSF :: Samples Written */
#define adl__hdc_and_sf__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set HDCAndSF :: Samples Blended */
#define adl__hdc_and_sf__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set HDCAndSF :: Sampler Texels */
#define adl__hdc_and_sf__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set HDCAndSF :: Sampler Texels Misses */
#define adl__hdc_and_sf__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set HDCAndSF :: SLM Bytes Read */
#define adl__hdc_and_sf__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set HDCAndSF :: SLM Bytes Written */
#define adl__hdc_and_sf__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set HDCAndSF :: Shader Memory Accesses */
#define adl__hdc_and_sf__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set HDCAndSF :: Shader Atomic Memory Accesses */
#define adl__hdc_and_sf__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set HDCAndSF :: L3 Shader Throughput */
#define adl__hdc_and_sf__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set HDCAndSF :: Shader Barrier Messages */
#define adl__hdc_and_sf__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3 */
#define adl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3 */
#define adl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3 */
#define adl__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3 */
#define adl__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3 */
#define adl__hdc_and_sf__non_sampler_shader04_access_stalled_on_l3__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set HDCAndSF :: Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3 */
#define adl__hdc_and_sf__non_sampler_shader05_access_stalled_on_l3__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set HDCAndSF :: Polygon Data Ready */
#define adl__hdc_and_sf__poly_data_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: GPU Time Elapsed */
#define adl__rasterizer_and_pixel_backend__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set RasterizerAndPixelBackend :: GPU Core Clocks */
#define adl__rasterizer_and_pixel_backend__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define adl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set RasterizerAndPixelBackend :: AVG GPU Core Frequency */
#define adl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set RasterizerAndPixelBackend :: GPU Busy */
#define adl__rasterizer_and_pixel_backend__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Metric set RasterizerAndPixelBackend :: VS Threads Dispatched */
#define adl__rasterizer_and_pixel_backend__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: HS Threads Dispatched */
#define adl__rasterizer_and_pixel_backend__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Metric set RasterizerAndPixelBackend :: DS Threads Dispatched */
#define adl__rasterizer_and_pixel_backend__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Metric set RasterizerAndPixelBackend :: GS Threads Dispatched */
#define adl__rasterizer_and_pixel_backend__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Metric set RasterizerAndPixelBackend :: FS Threads Dispatched */
#define adl__rasterizer_and_pixel_backend__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Metric set RasterizerAndPixelBackend :: CS Threads Dispatched */
#define adl__rasterizer_and_pixel_backend__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Metric set RasterizerAndPixelBackend :: EU Active */
#define adl__rasterizer_and_pixel_backend__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set RasterizerAndPixelBackend :: EU Stall */
#define adl__rasterizer_and_pixel_backend__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Metric set RasterizerAndPixelBackend :: EU Thread Occupancy */
#define adl__rasterizer_and_pixel_backend__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Metric set RasterizerAndPixelBackend :: Rasterized Pixels */
#define adl__rasterizer_and_pixel_backend__rasterized_pixels__read \
   bdw__render_basic__rasterized_pixels__read

/* Metric set RasterizerAndPixelBackend :: Early Hi-Depth Test Fails */
#define adl__rasterizer_and_pixel_backend__hi_depth_test_fails__read \
   bdw__render_basic__hi_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Early Depth Test Fails */
#define adl__rasterizer_and_pixel_backend__early_depth_test_fails__read \
   bdw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Samples Killed in FS */
#define adl__rasterizer_and_pixel_backend__samples_killed_in_ps__read \
   bdw__render_basic__samples_killed_in_ps__read

/* Metric set RasterizerAndPixelBackend :: Pixels Failing Tests */
#define adl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read \
   bdw__render_basic__pixels_failing_post_ps_tests__read

/* Metric set RasterizerAndPixelBackend :: Samples Written */
#define adl__rasterizer_and_pixel_backend__samples_written__read \
   bdw__render_basic__samples_written__read

/* Metric set RasterizerAndPixelBackend :: Samples Blended */
#define adl__rasterizer_and_pixel_backend__samples_blended__read \
   bdw__render_basic__samples_blended__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels */
#define adl__rasterizer_and_pixel_backend__sampler_texels__read \
   bdw__render_basic__sampler_texels__read

/* Metric set RasterizerAndPixelBackend :: Sampler Texels Misses */
#define adl__rasterizer_and_pixel_backend__sampler_texel_misses__read \
   bdw__render_basic__sampler_texel_misses__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Read */
#define adl__rasterizer_and_pixel_backend__slm_bytes_read__read \
   bdw__render_basic__slm_bytes_read__read

/* Metric set RasterizerAndPixelBackend :: SLM Bytes Written */
#define adl__rasterizer_and_pixel_backend__slm_bytes_written__read \
   bdw__render_basic__slm_bytes_written__read

/* Metric set RasterizerAndPixelBackend :: Shader Memory Accesses */
#define adl__rasterizer_and_pixel_backend__shader_memory_accesses__read \
   bdw__render_basic__shader_memory_accesses__read

/* Metric set RasterizerAndPixelBackend :: Shader Atomic Memory Accesses */
#define adl__rasterizer_and_pixel_backend__shader_atomics__read \
   bdw__render_basic__shader_atomics__read

/* Metric set RasterizerAndPixelBackend :: L3 Shader Throughput */
#define adl__rasterizer_and_pixel_backend__l3_shader_throughput__read \
   icl__render_basic__l3_shader_throughput__read

/* Metric set RasterizerAndPixelBackend :: Shader Barrier Messages */
#define adl__rasterizer_and_pixel_backend__shader_barriers__read \
   hsw__render_basic__early_depth_test_fails__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Input Available */
#define adl__rasterizer_and_pixel_backend__rasterizer0_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Rasterizer Output Ready */
#define adl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read \
   bdw__render_pipe_profile__so_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Post-EarlyZ Pixel Data Ready */
#define adl__rasterizer_and_pixel_backend__pixel_data00_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 PS Output Available */
#define adl__rasterizer_and_pixel_backend__ps_output00_available__read \
   bdw__render_pipe_profile__ds_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 PS Output Available */
#define adl__rasterizer_and_pixel_backend__ps_output01_available__read \
   bdw__render_pipe_profile__hs_stall__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe2 PS Output Available */
#define adl__rasterizer_and_pixel_backend__ps_output02_available__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe0 Pixel Values Ready */
#define adl__rasterizer_and_pixel_backend__pixel_values00_ready__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe1 Pixel Values Ready */
#define adl__rasterizer_and_pixel_backend__pixel_values01_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: Slice0 Pipe2 Pixel Values Ready */
#define adl__rasterizer_and_pixel_backend__pixel_values02_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ00 is full */
#define adl__rasterizer_and_pixel_backend__gt_request_queue00_full__read \
   bdw__render_pipe_profile__cl_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ01 is full */
#define adl__rasterizer_and_pixel_backend__gt_request_queue01_full__read \
   bdw__render_pipe_profile__sf_stall__read

/* Metric set RasterizerAndPixelBackend :: SQ10 is full */
#define adl__rasterizer_and_pixel_backend__gt_request_queue10_full__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* Metric set RasterizerAndPixelBackend :: SQ11 is full */
#define adl__rasterizer_and_pixel_backend__gt_request_queue11_full__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* L3_1 :: GPU Time Elapsed */
#define adl__l3_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_1 :: GPU Core Clocks */
#define adl__l3_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_1 :: AVG GPU Core Frequency */
#define adl__l3_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_1 :: AVG GPU Core Frequency */
#define adl__l3_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_1 :: GPU Busy */
#define adl__l3_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_1 :: EU Active */
#define adl__l3_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_1 :: EU Stall */
#define adl__l3_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_1 :: EU Thread Occupancy */
#define adl__l3_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_1 :: VS Threads Dispatched */
#define adl__l3_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_1 :: HS Threads Dispatched */
#define adl__l3_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_1 :: DS Threads Dispatched */
#define adl__l3_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_1 :: GS Threads Dispatched */
#define adl__l3_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_1 :: FS Threads Dispatched */
#define adl__l3_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_1 :: CS Threads Dispatched */
#define adl__l3_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_1 :: Slice0 L3 Bank0 Input Available */
#define adl__l3_1__l30_bank0_input_available__read \
   tglgt2__l3_1__l30_bank0_input_available__read

/* L3_1 :: Slice0 L3 Bank1 Input Available */
#define adl__l3_1__l30_bank1_input_available__read \
   tglgt2__l3_1__l30_bank1_input_available__read

/* L3_1 :: Slice0 L3 Bank4 Input Available */
#define adl__l3_1__l30_bank4_input_available__read \
   tglgt1__l3_2__l30_bank2_input_available__read

/* L3_1 :: Slice0 L3 Bank5 Input Available */
#define adl__l3_1__l30_bank5_input_available__read \
   tglgt1__l3_2__l30_bank3_input_available__read

/* L3_2 :: GPU Time Elapsed */
#define adl__l3_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_2 :: GPU Core Clocks */
#define adl__l3_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_2 :: AVG GPU Core Frequency */
#define adl__l3_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_2 :: AVG GPU Core Frequency */
#define adl__l3_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_2 :: GPU Busy */
#define adl__l3_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_2 :: VS Threads Dispatched */
#define adl__l3_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_2 :: HS Threads Dispatched */
#define adl__l3_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_2 :: DS Threads Dispatched */
#define adl__l3_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_2 :: GS Threads Dispatched */
#define adl__l3_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_2 :: FS Threads Dispatched */
#define adl__l3_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_2 :: CS Threads Dispatched */
#define adl__l3_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_2 :: EU Active */
#define adl__l3_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_2 :: EU Stall */
#define adl__l3_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_2 :: EU Thread Occupancy */
#define adl__l3_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_2 :: Slice0 L3 Bank2 Input Available */
#define adl__l3_2__l30_bank2_input_available__read \
   tglgt1__l3_2__l30_bank2_input_available__read

/* L3_2 :: Slice0 L3 Bank3 Input Available */
#define adl__l3_2__l30_bank3_input_available__read \
   tglgt1__l3_2__l30_bank3_input_available__read

/* L3_2 :: Slice0 L3 Bank6 Input Available */
#define adl__l3_2__l30_bank6_input_available__read \
   tglgt2__l3_1__l30_bank1_input_available__read

/* L3_2 :: Slice0 L3 Bank7 Input Available */
#define adl__l3_2__l30_bank7_input_available__read \
   tglgt2__l3_1__l30_bank0_input_available__read

/* L3_3 :: GPU Time Elapsed */
#define adl__l3_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_3 :: GPU Core Clocks */
#define adl__l3_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_3 :: AVG GPU Core Frequency */
#define adl__l3_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_3 :: AVG GPU Core Frequency */
#define adl__l3_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_3 :: GPU Busy */
#define adl__l3_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_3 :: VS Threads Dispatched */
#define adl__l3_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_3 :: HS Threads Dispatched */
#define adl__l3_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_3 :: DS Threads Dispatched */
#define adl__l3_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_3 :: GS Threads Dispatched */
#define adl__l3_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_3 :: FS Threads Dispatched */
#define adl__l3_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_3 :: CS Threads Dispatched */
#define adl__l3_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_3 :: EU Active */
#define adl__l3_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_3 :: EU Stall */
#define adl__l3_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_3 :: EU Thread Occupancy */
#define adl__l3_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_3 :: Slice0 L3 Bank0 Output Ready */
#define adl__l3_3__l30_bank0_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_3 :: Slice0 L3 Bank4 Output Ready */
#define adl__l3_3__l30_bank4_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_4 :: GPU Time Elapsed */
#define adl__l3_4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_4 :: GPU Core Clocks */
#define adl__l3_4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_4 :: AVG GPU Core Frequency */
#define adl__l3_4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_4 :: AVG GPU Core Frequency */
#define adl__l3_4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_4 :: GPU Busy */
#define adl__l3_4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_4 :: VS Threads Dispatched */
#define adl__l3_4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_4 :: HS Threads Dispatched */
#define adl__l3_4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_4 :: DS Threads Dispatched */
#define adl__l3_4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_4 :: GS Threads Dispatched */
#define adl__l3_4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_4 :: FS Threads Dispatched */
#define adl__l3_4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_4 :: CS Threads Dispatched */
#define adl__l3_4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_4 :: EU Active */
#define adl__l3_4__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_4 :: EU Stall */
#define adl__l3_4__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_4 :: EU Thread Occupancy */
#define adl__l3_4__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_4 :: Slice0 L3 Bank1 Output Ready */
#define adl__l3_4__l30_bank1_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_4 :: Slice0 L3 Bank5 Output Ready */
#define adl__l3_4__l30_bank5_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_5 :: GPU Time Elapsed */
#define adl__l3_5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_5 :: GPU Core Clocks */
#define adl__l3_5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_5 :: AVG GPU Core Frequency */
#define adl__l3_5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_5 :: AVG GPU Core Frequency */
#define adl__l3_5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_5 :: GPU Busy */
#define adl__l3_5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_5 :: VS Threads Dispatched */
#define adl__l3_5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_5 :: HS Threads Dispatched */
#define adl__l3_5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_5 :: DS Threads Dispatched */
#define adl__l3_5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_5 :: GS Threads Dispatched */
#define adl__l3_5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_5 :: FS Threads Dispatched */
#define adl__l3_5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_5 :: CS Threads Dispatched */
#define adl__l3_5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_5 :: EU Active */
#define adl__l3_5__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_5 :: EU Stall */
#define adl__l3_5__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_5 :: EU Thread Occupancy */
#define adl__l3_5__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_5 :: Slice0 L3 Bank2 Output Ready */
#define adl__l3_5__l30_bank2_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_5 :: Slice0 L3 Bank6 Output Ready */
#define adl__l3_5__l30_bank6_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* L3_6 :: GPU Time Elapsed */
#define adl__l3_6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* L3_6 :: GPU Core Clocks */
#define adl__l3_6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* L3_6 :: AVG GPU Core Frequency */
#define adl__l3_6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* L3_6 :: AVG GPU Core Frequency */
#define adl__l3_6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* L3_6 :: GPU Busy */
#define adl__l3_6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* L3_6 :: VS Threads Dispatched */
#define adl__l3_6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* L3_6 :: HS Threads Dispatched */
#define adl__l3_6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* L3_6 :: DS Threads Dispatched */
#define adl__l3_6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* L3_6 :: GS Threads Dispatched */
#define adl__l3_6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* L3_6 :: FS Threads Dispatched */
#define adl__l3_6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* L3_6 :: CS Threads Dispatched */
#define adl__l3_6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* L3_6 :: EU Active */
#define adl__l3_6__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* L3_6 :: EU Stall */
#define adl__l3_6__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* L3_6 :: EU Thread Occupancy */
#define adl__l3_6__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* L3_6 :: Slice0 L3 Bank3 Output Ready */
#define adl__l3_6__l30_bank3_output_ready__read \
   tglgt1__l3_4__l30_bank1_output_ready__read

/* L3_6 :: Slice0 L3 Bank7 Output Ready */
#define adl__l3_6__l30_bank7_output_ready__read \
   tglgt1__l3_3__l30_bank0_output_ready__read

/* Sampler_1 :: GPU Time Elapsed */
#define adl__sampler_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_1 :: GPU Core Clocks */
#define adl__sampler_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define adl__sampler_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_1 :: AVG GPU Core Frequency */
#define adl__sampler_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_1 :: GPU Busy */
#define adl__sampler_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_1 :: VS Threads Dispatched */
#define adl__sampler_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_1 :: HS Threads Dispatched */
#define adl__sampler_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_1 :: DS Threads Dispatched */
#define adl__sampler_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_1 :: GS Threads Dispatched */
#define adl__sampler_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_1 :: FS Threads Dispatched */
#define adl__sampler_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_1 :: CS Threads Dispatched */
#define adl__sampler_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_1 :: EU Active */
#define adl__sampler_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_1 :: EU Stall */
#define adl__sampler_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_1 :: EU Thread Occupancy */
#define adl__sampler_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_1 :: Slice0 DualSubslice0 Input Available */
#define adl__sampler_1__sampler00_input_available__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice1 Input Available */
#define adl__sampler_1__sampler01_input_available__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice2 Input Available */
#define adl__sampler_1__sampler02_input_available__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice3 Input Available */
#define adl__sampler_1__sampler03_input_available__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Sampler_1 :: Slice0 DualSubslice4 Input Available */
#define adl__sampler_1__sampler04_input_available__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_1 :: Slice0 DualSubslice5 Input Available */
#define adl__sampler_1__sampler05_input_available__read \
   bdw__render_basic__sampler0_busy__read

/* Sampler_2 :: GPU Time Elapsed */
#define adl__sampler_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Sampler_2 :: GPU Core Clocks */
#define adl__sampler_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Sampler_2 :: AVG GPU Core Frequency */
#define adl__sampler_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Sampler_2 :: AVG GPU Core Frequency */
#define adl__sampler_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Sampler_2 :: GPU Busy */
#define adl__sampler_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* Sampler_2 :: VS Threads Dispatched */
#define adl__sampler_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* Sampler_2 :: HS Threads Dispatched */
#define adl__sampler_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* Sampler_2 :: DS Threads Dispatched */
#define adl__sampler_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* Sampler_2 :: GS Threads Dispatched */
#define adl__sampler_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* Sampler_2 :: FS Threads Dispatched */
#define adl__sampler_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* Sampler_2 :: CS Threads Dispatched */
#define adl__sampler_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* Sampler_2 :: EU Active */
#define adl__sampler_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* Sampler_2 :: EU Stall */
#define adl__sampler_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* Sampler_2 :: EU Thread Occupancy */
#define adl__sampler_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* Sampler_2 :: Slice0 DualSubslice0 Sampler Output Ready */
#define adl__sampler_2__sampler00_output_ready__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice1 Sampler Output Ready */
#define adl__sampler_2__sampler01_output_ready__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice2 Sampler Output Ready */
#define adl__sampler_2__sampler02_output_ready__read \
   bdw__render_basic__sampler1_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice3 Sampler Output Ready */
#define adl__sampler_2__sampler03_output_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* Sampler_2 :: Slice0 DualSubslice4 Sampler Output Ready */
#define adl__sampler_2__sampler04_output_ready__read \
   bdw__render_basic__sampler1_busy__read

/* Sampler_2 :: Slice0 DualSubslice5 Sampler Output Ready */
#define adl__sampler_2__sampler05_output_ready__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: GPU Time Elapsed */
#define adl__tdl_1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_1 :: GPU Core Clocks */
#define adl__tdl_1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_1 :: AVG GPU Core Frequency */
#define adl__tdl_1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_1 :: AVG GPU Core Frequency */
#define adl__tdl_1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_1 :: GPU Busy */
#define adl__tdl_1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_1 :: VS Threads Dispatched */
#define adl__tdl_1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_1 :: HS Threads Dispatched */
#define adl__tdl_1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_1 :: DS Threads Dispatched */
#define adl__tdl_1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_1 :: GS Threads Dispatched */
#define adl__tdl_1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_1 :: FS Threads Dispatched */
#define adl__tdl_1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_1 :: CS Threads Dispatched */
#define adl__tdl_1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_1 :: EU Active */
#define adl__tdl_1__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_1 :: EU Stall */
#define adl__tdl_1__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_1 :: EU Thread Occupancy */
#define adl__tdl_1__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher */
#define adl__tdl_1__non_ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher */
#define adl__tdl_1__non_ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher */
#define adl__tdl_1__non_ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher */
#define adl__tdl_1__non_ps_thread03_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher */
#define adl__tdl_1__non_ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_1 :: Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher */
#define adl__tdl_1__non_ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0 */
#define adl__tdl_1__thread_header00_ready_port0__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1 */
#define adl__tdl_1__thread_header00_ready_port1__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2 */
#define adl__tdl_1__thread_header00_ready_port2__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3 */
#define adl__tdl_1__thread_header00_ready_port3__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0 */
#define adl__tdl_1__thread_header01_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1 */
#define adl__tdl_1__thread_header01_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2 */
#define adl__tdl_1__thread_header01_ready_port2__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3 */
#define adl__tdl_1__thread_header01_ready_port3__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher */
#define adl__tdl_1__thread_header00_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_1 :: Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher */
#define adl__tdl_1__thread_header01_ready__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* TDL_2 :: GPU Time Elapsed */
#define adl__tdl_2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_2 :: GPU Core Clocks */
#define adl__tdl_2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_2 :: AVG GPU Core Frequency */
#define adl__tdl_2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_2 :: AVG GPU Core Frequency */
#define adl__tdl_2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_2 :: GPU Busy */
#define adl__tdl_2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_2 :: VS Threads Dispatched */
#define adl__tdl_2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_2 :: HS Threads Dispatched */
#define adl__tdl_2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_2 :: DS Threads Dispatched */
#define adl__tdl_2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_2 :: GS Threads Dispatched */
#define adl__tdl_2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_2 :: FS Threads Dispatched */
#define adl__tdl_2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_2 :: CS Threads Dispatched */
#define adl__tdl_2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_2 :: EU Active */
#define adl__tdl_2__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_2 :: EU Stall */
#define adl__tdl_2__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_2 :: EU Thread Occupancy */
#define adl__tdl_2__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher */
#define adl__tdl_2__ps_thread00_ready_for_dispatch__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher */
#define adl__tdl_2__ps_thread01_ready_for_dispatch__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice2 Thread Dispatcher */
#define adl__tdl_2__ps_thread02_ready_for_dispatch__read \
   bdw__render_basic__sampler1_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher */
#define adl__tdl_2__ps_thread03_ready_for_dispatch__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher */
#define adl__tdl_2__ps_thread04_ready_for_dispatch__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_2 :: PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher */
#define adl__tdl_2__ps_thread05_ready_for_dispatch__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher */
#define adl__tdl_2__thread_header05_ready__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0 */
#define adl__tdl_2__thread_header05_ready_port0__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1 */
#define adl__tdl_2__thread_header05_ready_port1__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 2 */
#define adl__tdl_2__thread_header05_ready_port2__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_2 :: Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3 */
#define adl__tdl_2__thread_header05_ready_port3__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_3 :: GPU Time Elapsed */
#define adl__tdl_3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* TDL_3 :: GPU Core Clocks */
#define adl__tdl_3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* TDL_3 :: AVG GPU Core Frequency */
#define adl__tdl_3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* TDL_3 :: AVG GPU Core Frequency */
#define adl__tdl_3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* TDL_3 :: GPU Busy */
#define adl__tdl_3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* TDL_3 :: VS Threads Dispatched */
#define adl__tdl_3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* TDL_3 :: HS Threads Dispatched */
#define adl__tdl_3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* TDL_3 :: DS Threads Dispatched */
#define adl__tdl_3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* TDL_3 :: GS Threads Dispatched */
#define adl__tdl_3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* TDL_3 :: FS Threads Dispatched */
#define adl__tdl_3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* TDL_3 :: CS Threads Dispatched */
#define adl__tdl_3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* TDL_3 :: EU Active */
#define adl__tdl_3__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* TDL_3 :: EU Stall */
#define adl__tdl_3__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* TDL_3 :: EU Thread Occupancy */
#define adl__tdl_3__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher */
#define adl__tdl_3__thread_header02_ready__read \
   bdw__render_basic__sampler0_busy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher */
#define adl__tdl_3__thread_header03_ready__read \
   bdw__render_basic__sampler1_busy__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher */
#define adl__tdl_3__thread_header04_ready__read \
   bdw__render_basic__sampler0_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0 */
#define adl__tdl_3__thread_header02_ready_port0__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 1 */
#define adl__tdl_3__thread_header02_ready_port1__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2 */
#define adl__tdl_3__thread_header02_ready_port2__read \
   bdw__render_pipe_profile__sf_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 3 */
#define adl__tdl_3__thread_header02_ready_port3__read \
   bdw__render_pipe_profile__cl_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0 */
#define adl__tdl_3__thread_header03_ready_port0__read \
   bdw__render_pipe_profile__so_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1 */
#define adl__tdl_3__thread_header03_ready_port1__read \
   bdw__render_pipe_profile__ds_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2 */
#define adl__tdl_3__thread_header03_ready_port2__read \
   bdw__render_pipe_profile__hs_stall__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3 */
#define adl__tdl_3__thread_header03_ready_port3__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0 */
#define adl__tdl_3__thread_header04_ready_port0__read \
   bdw__render_pipe_profile__early_depth_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1 */
#define adl__tdl_3__thread_header04_ready_port1__read \
   bdw__render_pipe_profile__sf_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2 */
#define adl__tdl_3__thread_header04_ready_port2__read \
   bdw__render_pipe_profile__cl_bottleneck__read

/* TDL_3 :: Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3 */
#define adl__tdl_3__thread_header04_ready_port3__read \
   bdw__render_pipe_profile__so_bottleneck__read

/* GpuBusyness :: GPU Time Elapsed */
#define adl__gpu_busyness__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* GpuBusyness :: GPU Core Clocks */
#define adl__gpu_busyness__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define adl__gpu_busyness__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* GpuBusyness :: AVG GPU Core Frequency */
#define adl__gpu_busyness__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* GpuBusyness :: GPU Busy */
#define adl__gpu_busyness__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* GpuBusyness :: EU Active */
#define adl__gpu_busyness__eu_active__read \
   tglgt1__render_basic__eu_active__read

/* GpuBusyness :: EU Stall */
#define adl__gpu_busyness__eu_stall__read \
   tglgt1__render_basic__eu_stall__read

/* GpuBusyness :: EU Thread Occupancy */
#define adl__gpu_busyness__eu_thread_occupancy__read \
   tglgt1__render_basic__eu_thread_occupancy__read

/* GpuBusyness :: VS Threads Dispatched */
#define adl__gpu_busyness__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* GpuBusyness :: HS Threads Dispatched */
#define adl__gpu_busyness__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* GpuBusyness :: DS Threads Dispatched */
#define adl__gpu_busyness__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* GpuBusyness :: GS Threads Dispatched */
#define adl__gpu_busyness__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* GpuBusyness :: FS Threads Dispatched */
#define adl__gpu_busyness__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* GpuBusyness :: CS Threads Dispatched */
#define adl__gpu_busyness__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* GpuBusyness :: Render Ring Busy */
#define adl__gpu_busyness__render_busy__read \
   bdw__render_pipe_profile__hi_depth_bottleneck__read

/* GpuBusyness :: Compute Ring Busy */
#define adl__gpu_busyness__compute_busy__read \
   bdw__render_pipe_profile__so_stall__read

/* GpuBusyness :: Posh Ring Busy */
#define adl__gpu_busyness__posh_engine_busy__read \
   bdw__render_pipe_profile__sf_stall__read

/* GpuBusyness :: Blitter Ring Busy */
#define adl__gpu_busyness__blitter_busy__read \
   bdw__render_pipe_profile__cl_stall__read

/* GpuBusyness :: Vebox Ring Busy */
#define adl__gpu_busyness__vebox_busy__read \
   bdw__render_pipe_profile__bc_bottleneck__read

/* GpuBusyness :: Vdbox0 Ring Busy */
#define adl__gpu_busyness__vdbox0_busy__read \
   bdw__render_pipe_profile__ds_stall__read

/* GpuBusyness :: Vdbox1 Ring Busy */
#define adl__gpu_busyness__vdbox1_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* GpuBusyness :: Render and compute engines are simultaneously busy */
#define adl__gpu_busyness__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* GpuBusyness :: Any Engine Busy */
#define adl__gpu_busyness__any_engine_busy__read \
   bdw__render_basic__sampler1_busy__read

/* EuActivity1 :: GPU Time Elapsed */
#define adl__eu_activity1__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity1 :: GPU Core Clocks */
#define adl__eu_activity1__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define adl__eu_activity1__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity1 :: AVG GPU Core Frequency */
#define adl__eu_activity1__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity1 :: GPU Busy */
#define adl__eu_activity1__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity1 :: VS Threads Dispatched */
#define adl__eu_activity1__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity1 :: HS Threads Dispatched */
#define adl__eu_activity1__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity1 :: DS Threads Dispatched */
#define adl__eu_activity1__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity1 :: GS Threads Dispatched */
#define adl__eu_activity1__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity1 :: FS Threads Dispatched */
#define adl__eu_activity1__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity1 :: CS Threads Dispatched */
#define adl__eu_activity1__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity1 :: Render Ring Busy */
#define adl__eu_activity1__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity1 :: Compute Ring Busy */
#define adl__eu_activity1__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity1 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity1__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity1 :: GTI Read Throughput */
#define adl__eu_activity1__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity1 :: GTI Write Throughput */
#define adl__eu_activity1__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity1 :: VS FPU Pipe Active */
#define adl__eu_activity1__vs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity1 :: PS FPU Pipe Active */
#define adl__eu_activity1__ps_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity1 :: EU Send Pipe Active */
#define adl__eu_activity1__eu_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: GPU Time Elapsed */
#define adl__eu_activity2__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity2 :: GPU Core Clocks */
#define adl__eu_activity2__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define adl__eu_activity2__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity2 :: AVG GPU Core Frequency */
#define adl__eu_activity2__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity2 :: GPU Busy */
#define adl__eu_activity2__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity2 :: VS Threads Dispatched */
#define adl__eu_activity2__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity2 :: HS Threads Dispatched */
#define adl__eu_activity2__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity2 :: DS Threads Dispatched */
#define adl__eu_activity2__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity2 :: GS Threads Dispatched */
#define adl__eu_activity2__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity2 :: FS Threads Dispatched */
#define adl__eu_activity2__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity2 :: CS Threads Dispatched */
#define adl__eu_activity2__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity2 :: Render Ring Busy */
#define adl__eu_activity2__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity2 :: Compute Ring Busy */
#define adl__eu_activity2__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity2 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity2__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity2 :: GTI Read Throughput */
#define adl__eu_activity2__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity2 :: GTI Write Throughput */
#define adl__eu_activity2__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity2 :: CS EM Pipe Active */
#define adl__eu_activity2__cs_em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity2 :: CS FPU Pipe Active */
#define adl__eu_activity2__cs_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity2 :: CS Send Pipeline Active */
#define adl__eu_activity2__cs_send_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: GPU Time Elapsed */
#define adl__eu_activity3__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity3 :: GPU Core Clocks */
#define adl__eu_activity3__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define adl__eu_activity3__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity3 :: AVG GPU Core Frequency */
#define adl__eu_activity3__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity3 :: GPU Busy */
#define adl__eu_activity3__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity3 :: VS Threads Dispatched */
#define adl__eu_activity3__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity3 :: HS Threads Dispatched */
#define adl__eu_activity3__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity3 :: DS Threads Dispatched */
#define adl__eu_activity3__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity3 :: GS Threads Dispatched */
#define adl__eu_activity3__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity3 :: FS Threads Dispatched */
#define adl__eu_activity3__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity3 :: CS Threads Dispatched */
#define adl__eu_activity3__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity3 :: Render Ring Busy */
#define adl__eu_activity3__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity3 :: Compute Ring Busy */
#define adl__eu_activity3__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity3 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity3__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity3 :: GTI Read Throughput */
#define adl__eu_activity3__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity3 :: GTI Write Throughput */
#define adl__eu_activity3__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity3 :: VS EM Pipe Active */
#define adl__eu_activity3__vs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity3 :: PS EM Pipe Active */
#define adl__eu_activity3__ps_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity3 :: PS Send Pipeline Active */
#define adl__eu_activity3__ps_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity4 :: GPU Time Elapsed */
#define adl__eu_activity4__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity4 :: GPU Core Clocks */
#define adl__eu_activity4__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define adl__eu_activity4__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity4 :: AVG GPU Core Frequency */
#define adl__eu_activity4__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity4 :: GPU Busy */
#define adl__eu_activity4__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity4 :: VS Threads Dispatched */
#define adl__eu_activity4__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity4 :: HS Threads Dispatched */
#define adl__eu_activity4__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity4 :: DS Threads Dispatched */
#define adl__eu_activity4__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity4 :: GS Threads Dispatched */
#define adl__eu_activity4__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity4 :: FS Threads Dispatched */
#define adl__eu_activity4__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity4 :: CS Threads Dispatched */
#define adl__eu_activity4__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity4 :: Render Ring Busy */
#define adl__eu_activity4__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity4 :: Compute Ring Busy */
#define adl__eu_activity4__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity4 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity4__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity4 :: GTI Read Throughput */
#define adl__eu_activity4__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity4 :: GTI Write Throughput */
#define adl__eu_activity4__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity4 :: HS FPU Pipe Active */
#define adl__eu_activity4__hs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity4 :: DS FPU Pipe Active */
#define adl__eu_activity4__ds_fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity4 :: VS Send Pipe Active */
#define adl__eu_activity4__vs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity5 :: GPU Time Elapsed */
#define adl__eu_activity5__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity5 :: GPU Core Clocks */
#define adl__eu_activity5__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define adl__eu_activity5__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity5 :: AVG GPU Core Frequency */
#define adl__eu_activity5__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity5 :: GPU Busy */
#define adl__eu_activity5__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity5 :: VS Threads Dispatched */
#define adl__eu_activity5__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity5 :: HS Threads Dispatched */
#define adl__eu_activity5__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity5 :: DS Threads Dispatched */
#define adl__eu_activity5__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity5 :: GS Threads Dispatched */
#define adl__eu_activity5__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity5 :: FS Threads Dispatched */
#define adl__eu_activity5__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity5 :: CS Threads Dispatched */
#define adl__eu_activity5__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity5 :: Render Ring Busy */
#define adl__eu_activity5__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity5 :: Compute Ring Busy */
#define adl__eu_activity5__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity5 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity5__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity5 :: GTI Read Throughput */
#define adl__eu_activity5__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity5 :: GTI Write Throughput */
#define adl__eu_activity5__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity5 :: HS EM Pipe Active */
#define adl__eu_activity5__hs_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity5 :: DS EM Pipe Active */
#define adl__eu_activity5__ds_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity5 :: HS Send Pipe Active */
#define adl__eu_activity5__hs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity6 :: GPU Time Elapsed */
#define adl__eu_activity6__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity6 :: GPU Core Clocks */
#define adl__eu_activity6__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define adl__eu_activity6__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity6 :: AVG GPU Core Frequency */
#define adl__eu_activity6__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity6 :: GPU Busy */
#define adl__eu_activity6__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity6 :: VS Threads Dispatched */
#define adl__eu_activity6__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity6 :: HS Threads Dispatched */
#define adl__eu_activity6__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity6 :: DS Threads Dispatched */
#define adl__eu_activity6__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity6 :: GS Threads Dispatched */
#define adl__eu_activity6__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity6 :: FS Threads Dispatched */
#define adl__eu_activity6__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity6 :: CS Threads Dispatched */
#define adl__eu_activity6__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity6 :: Render Ring Busy */
#define adl__eu_activity6__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity6 :: Compute Ring Busy */
#define adl__eu_activity6__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity6 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity6__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity6 :: GTI Read Throughput */
#define adl__eu_activity6__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity6 :: GTI Write Throughput */
#define adl__eu_activity6__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity6 :: GS FPU Pipe Active */
#define adl__eu_activity6__gs_fpu_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity6 :: GS EM Pipe Active */
#define adl__eu_activity6__gs_em_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity6 :: GS Send Pipe Active */
#define adl__eu_activity6__gs_send_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: GPU Time Elapsed */
#define adl__eu_activity7__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity7 :: GPU Core Clocks */
#define adl__eu_activity7__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define adl__eu_activity7__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity7 :: AVG GPU Core Frequency */
#define adl__eu_activity7__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity7 :: GPU Busy */
#define adl__eu_activity7__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity7 :: VS Threads Dispatched */
#define adl__eu_activity7__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity7 :: HS Threads Dispatched */
#define adl__eu_activity7__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity7 :: DS Threads Dispatched */
#define adl__eu_activity7__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity7 :: GS Threads Dispatched */
#define adl__eu_activity7__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity7 :: FS Threads Dispatched */
#define adl__eu_activity7__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity7 :: CS Threads Dispatched */
#define adl__eu_activity7__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity7 :: EU FPU Pipe Active */
#define adl__eu_activity7__fpu_active__read \
   tglgt1__render_basic__eu_stall__read

/* EuActivity7 :: EM Pipe Active */
#define adl__eu_activity7__em_active__read \
   tglgt1__eu_activity1__eu_send_active__read

/* EuActivity7 :: EU FPU And EM Pipes Active */
#define adl__eu_activity7__eu_fpu_em_active__read \
   tglgt1__render_basic__eu_active__read

/* EuActivity7 :: EU AVG IPC Rate */
#define adl__eu_activity7__eu_avg_ipc_rate__read \
   tglgt1__eu_activity7__eu_avg_ipc_rate__read

/* EuActivity7 :: Render Ring Busy */
#define adl__eu_activity7__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity7 :: Compute Ring Busy */
#define adl__eu_activity7__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity7 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity7__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity7 :: GTI Read Throughput */
#define adl__eu_activity7__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity7 :: GTI Write Throughput */
#define adl__eu_activity7__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: GPU Time Elapsed */
#define adl__eu_activity8__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* EuActivity8 :: GPU Core Clocks */
#define adl__eu_activity8__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define adl__eu_activity8__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* EuActivity8 :: AVG GPU Core Frequency */
#define adl__eu_activity8__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* EuActivity8 :: GPU Busy */
#define adl__eu_activity8__gpu_busy__read \
   bdw__render_basic__gpu_busy__read

/* EuActivity8 :: VS Threads Dispatched */
#define adl__eu_activity8__vs_threads__read \
   bdw__render_basic__vs_threads__read

/* EuActivity8 :: HS Threads Dispatched */
#define adl__eu_activity8__hs_threads__read \
   bdw__render_basic__hs_threads__read

/* EuActivity8 :: DS Threads Dispatched */
#define adl__eu_activity8__ds_threads__read \
   bdw__render_basic__ds_threads__read

/* EuActivity8 :: GS Threads Dispatched */
#define adl__eu_activity8__gs_threads__read \
   hsw__render_basic__vs_threads__read

/* EuActivity8 :: FS Threads Dispatched */
#define adl__eu_activity8__ps_threads__read \
   bdw__render_basic__ps_threads__read

/* EuActivity8 :: CS Threads Dispatched */
#define adl__eu_activity8__cs_threads__read \
   bdw__render_basic__cs_threads__read

/* EuActivity8 :: Render Ring Busy */
#define adl__eu_activity8__render_busy__read \
   bdw__render_pipe_profile__hs_stall__read

/* EuActivity8 :: Compute Ring Busy */
#define adl__eu_activity8__compute_busy__read \
   bdw__render_pipe_profile__vf_bottleneck__read

/* EuActivity8 :: Render and compute engines are simultaneously busy */
#define adl__eu_activity8__render_and_compute_busy__read \
   bdw__render_basic__sampler0_busy__read

/* EuActivity8 :: GTI Read Throughput */
#define adl__eu_activity8__gti_read_throughput__read \
   tglgt1__compute_basic__gti_read_throughput__read

/* EuActivity8 :: GTI Write Throughput */
#define adl__eu_activity8__gti_write_throughput__read \
   tglgt1__compute_basic__gti_write_throughput__read

/* EuActivity8 :: DS Send Pipe Active */
#define adl__eu_activity8__ds_send_active__read \
   tglgt1__render_basic__eu_active__read

/* Metric set TestOa :: GPU Time Elapsed */
#define adl__test_oa__gpu_time__read \
   hsw__render_basic__gpu_time__read

/* Metric set TestOa :: GPU Core Clocks */
#define adl__test_oa__gpu_core_clocks__read \
   bdw__render_basic__gpu_core_clocks__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define adl__test_oa__avg_gpu_core_frequency__read \
   bdw__render_basic__avg_gpu_core_frequency__read

/* Metric set TestOa :: AVG GPU Core Frequency */
#define adl__test_oa__avg_gpu_core_frequency__max \
   hsw__render_basic__avg_gpu_core_frequency__max

/* Metric set TestOa :: TestCounter0 */
#define adl__test_oa__counter0__read \
   hsw__compute_extended__eu_untyped_reads0__read

/* Metric set TestOa :: TestCounter1 */
#define adl__test_oa__counter1__read \
   hsw__compute_extended__eu_untyped_writes0__read

/* Metric set TestOa :: TestCounter2 */
#define adl__test_oa__counter2__read \
   hsw__compute_extended__eu_typed_reads0__read

/* Metric set TestOa :: TestCounter3 */
#define adl__test_oa__counter3__read \
   hsw__compute_extended__eu_typed_writes0__read

/* Metric set TestOa :: TestCounter4 */
#define adl__test_oa__counter4__read \
   hsw__compute_extended__eu_untyped_atomics0__read

/* Metric set TestOa :: TestCounter5 */
#define adl__test_oa__counter5__read \
   hsw__compute_extended__eu_typed_atomics0__read

/* Metric set TestOa :: TestCounter6 */
#define adl__test_oa__counter6__read \
   hsw__compute_extended__eu_urb_atomics0__read

/* Metric set TestOa :: TestCounter7 */
#define adl__test_oa__counter7__read \
   hsw__compute_extended__gpu_clocks__read

/* Metric set TestOa :: TestCounter8 */
#define adl__test_oa__counter8__read \
   hsw__compute_extended__typed_writes0__read

/* Metric set TestOa :: TestCounter9 - OAR enable */
#define adl__test_oa__counter9__read \
   hsw__compute_extended__untyped_writes0__read


static void
hsw_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "a490e9d2-55b3-4db0-8dab-53011032c5f3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 70);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A45_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->a_offset = query->gpu_time_offset + 1;
   query->b_offset = query->a_offset + 45;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000253A4, .val = 0x01600000 },
         { .reg = 0x00025440, .val = 0x00100000 },
         { .reg = 0x00025128, .val = 0x00000000 },
         { .reg = 0x0002691C, .val = 0x00000800 },
         { .reg = 0x00026AA0, .val = 0x01500000 },
         { .reg = 0x00026B9C, .val = 0x00006000 },
         { .reg = 0x0002791C, .val = 0x00000800 },
         { .reg = 0x00027AA0, .val = 0x01500000 },
         { .reg = 0x00027B9C, .val = 0x00006000 },
         { .reg = 0x0002641C, .val = 0x00000400 },
         { .reg = 0x00025380, .val = 0x00000010 },
         { .reg = 0x0002538C, .val = 0x00000000 },
         { .reg = 0x00025384, .val = 0x0800AAAA },
         { .reg = 0x00025400, .val = 0x00000004 },
         { .reg = 0x0002540C, .val = 0x06029000 },
         { .reg = 0x00025410, .val = 0x00000002 },
         { .reg = 0x00025404, .val = 0x5C30FFFF },
         { .reg = 0x00025100, .val = 0x00000016 },
         { .reg = 0x00025110, .val = 0x00000400 },
         { .reg = 0x00025104, .val = 0x00000000 },
         { .reg = 0x00026804, .val = 0x00001211 },
         { .reg = 0x00026884, .val = 0x00000100 },
         { .reg = 0x00026900, .val = 0x00000002 },
         { .reg = 0x00026908, .val = 0x00700000 },
         { .reg = 0x00026904, .val = 0x00000000 },
         { .reg = 0x00026984, .val = 0x00001022 },
         { .reg = 0x00026A04, .val = 0x00000011 },
         { .reg = 0x00026A80, .val = 0x00000006 },
         { .reg = 0x00026A88, .val = 0x00000C02 },
         { .reg = 0x00026A84, .val = 0x00000000 },
         { .reg = 0x00026B04, .val = 0x00001000 },
         { .reg = 0x00026B80, .val = 0x00000002 },
         { .reg = 0x00026B8C, .val = 0x00000007 },
         { .reg = 0x00026B84, .val = 0x00000000 },
         { .reg = 0x00027804, .val = 0x00004844 },
         { .reg = 0x00027884, .val = 0x00000400 },
         { .reg = 0x00027900, .val = 0x00000002 },
         { .reg = 0x00027908, .val = 0x0E000000 },
         { .reg = 0x00027904, .val = 0x00000000 },
         { .reg = 0x00027984, .val = 0x00004088 },
         { .reg = 0x00027A04, .val = 0x00000044 },
         { .reg = 0x00027A80, .val = 0x00000006 },
         { .reg = 0x00027A88, .val = 0x00018040 },
         { .reg = 0x00027A84, .val = 0x00000000 },
         { .reg = 0x00027B04, .val = 0x00004000 },
         { .reg = 0x00027B80, .val = 0x00000002 },
         { .reg = 0x00027B8C, .val = 0x000000E0 },
         { .reg = 0x00027B84, .val = 0x00000000 },
         { .reg = 0x00026104, .val = 0x00002222 },
         { .reg = 0x00026184, .val = 0x0C006666 },
         { .reg = 0x00026284, .val = 0x04000000 },
         { .reg = 0x00026304, .val = 0x04000000 },
         { .reg = 0x00026400, .val = 0x00000002 },
         { .reg = 0x00026410, .val = 0x000000A0 },
         { .reg = 0x00026404, .val = 0x00000000 },
         { .reg = 0x00025420, .val = 0x04108020 },
         { .reg = 0x00025424, .val = 0x1284A420 },
         { .reg = 0x0002541C, .val = 0x00000000 },
         { .reg = 0x00025428, .val = 0x00042049 },
         { .reg = 0x000091BC, .val = 0xE0400000 },
         { .reg = 0x000091C4, .val = 0xE4500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = hsw__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuActivePerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuStallPerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuStall";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuActive";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuActivePerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuStallPerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuStall";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuActive";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuActivePerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuStallPerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuStall";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuActivePerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuStallPerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuStall";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuActive";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 180;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuActivePerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuStallPerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuStall";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuActive";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 204;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuActivePerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuStallPerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuStall";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which sampler 0 was busy. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 228;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which sampler 1 was busy. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 232;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers were busy. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which sampler 0 was bottlenecks. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = hsw__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which sampler 1 was bottlenecks. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__render_basic__sampler0_texels__read;
         counter->name = "Sampler 0 Texels LOD0";
         counter->desc = "The total number of texels lookups in LOD0 in sampler 0 unit. Unit: texels.";
         counter->symbol_name = "Sampler0Texels";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__render_basic__sampler1_texels__read;
         counter->name = "Sampler 1 Texels LOD0";
         counter->desc = "The total number of texels lookups in LOD0 in sampler 1 unit. Unit: texels.";
         counter->symbol_name = "Sampler1Texels";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 256;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels LOD0";
      counter->desc = "The total number of texels lookups in LOD0 in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test. Unit: pixels.";
      counter->symbol_name = "AlphaTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test. Unit: pixels.";
      counter->symbol_name = "PostPsStencilTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test. Unit: pixels.";
      counter->symbol_name = "PostPsDepthTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 344;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 376;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 384;

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__render_basic__llc_accesses__read;
         counter->name = "LLC GPU Accesses";
         counter->desc = "The total number of LLC cache lookups done from the GPU (64b reads, 32B writes). Unit: messages.";
         counter->symbol_name = "LlcAccesses";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 392;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__render_basic__llc_hits__read;
         counter->name = "LLC GPU Hits";
         counter->desc = "The total number of successful LLC cache lookups done from the GPU. Unit: messages.";
         counter->symbol_name = "LlcHits";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 400;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__render_basic__llc_gpu_throughput__read;
         counter->name = "LLC GPU Throughput";
         counter->desc = "The total number of GPU memory bytes transferred between GPU and LLC. Unit: bytes.";
         counter->symbol_name = "LlcGpuThroughput";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
         counter->raw_max = 0 /* unsupported (varies over time) */;
         counter->offset = 408;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ps_duration__read;
      counter->name = "FS Duration";
      counter->desc = "Total Fragment Shader GPU duration. Unit: us.";
      counter->symbol_name = "PsDuration";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 416;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__vs_duration__read;
      counter->name = "VS Duration";
      counter->desc = "Total Vertex Shader GPU duration. Unit: us.";
      counter->symbol_name = "VsDuration";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 424;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__gs_duration__read;
      counter->name = "GS Duration";
      counter->desc = "Total Geometry Shader GPU duration. Unit: us.";
      counter->symbol_name = "GsDuration";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 432;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__ds_duration__read;
      counter->name = "TES Duration";
      counter->desc = "Total Evaluation Shader GPU duration. Unit: us.";
      counter->symbol_name = "DsDuration";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 440;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__hs_duration__read;
      counter->name = "TCS Duration";
      counter->desc = "Total Control Shader GPU duration. Unit: us.";
      counter->symbol_name = "HsDuration";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 448;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__render_basic__cs_duration__read;
      counter->name = "CS Duration";
      counter->desc = "Total Compute Shader GPU duration. Unit: us.";
      counter->symbol_name = "CsDuration";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 456;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__sampler_bottleneck__read;
      counter->name = "Sampler Bottleneck";
      counter->desc = "The percentage of time in which samplers were bottlenecks. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 464;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__render_basic__eu_idle__read;
      counter->name = "EU Idle";
      counter->desc = "The percentage of time in which the Execution Units were idle. Unit: percent.";
      counter->symbol_name = "EuIdle";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 468;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
hsw_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "b344c8cb-a291-4cbf-aa9c-b40213bfc96f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A45_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->a_offset = query->gpu_time_offset + 1;
   query->b_offset = query->a_offset + 45;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000253A4, .val = 0x00000000 },
         { .reg = 0x0002681C, .val = 0x01F00800 },
         { .reg = 0x00026820, .val = 0x00001000 },
         { .reg = 0x0002781C, .val = 0x01F00800 },
         { .reg = 0x00026520, .val = 0x00000007 },
         { .reg = 0x000265A0, .val = 0x00000007 },
         { .reg = 0x00025380, .val = 0x00000010 },
         { .reg = 0x0002538C, .val = 0x00300000 },
         { .reg = 0x00025384, .val = 0xAA8AAAAA },
         { .reg = 0x00025404, .val = 0xFFFFFFFF },
         { .reg = 0x00026800, .val = 0x00004202 },
         { .reg = 0x00026808, .val = 0x00605817 },
         { .reg = 0x0002680C, .val = 0x10001005 },
         { .reg = 0x00026804, .val = 0x00000000 },
         { .reg = 0x00027800, .val = 0x00000102 },
         { .reg = 0x00027808, .val = 0x0C0701E0 },
         { .reg = 0x0002780C, .val = 0x000200A0 },
         { .reg = 0x00027804, .val = 0x00000000 },
         { .reg = 0x00026484, .val = 0x44000000 },
         { .reg = 0x00026704, .val = 0x44000000 },
         { .reg = 0x00026500, .val = 0x00000006 },
         { .reg = 0x00026510, .val = 0x00000001 },
         { .reg = 0x00026504, .val = 0x88000000 },
         { .reg = 0x00026580, .val = 0x00000006 },
         { .reg = 0x00026590, .val = 0x00000020 },
         { .reg = 0x00026584, .val = 0x00000000 },
         { .reg = 0x00026104, .val = 0x55822222 },
         { .reg = 0x00026184, .val = 0xAA866666 },
         { .reg = 0x00025420, .val = 0x08320C83 },
         { .reg = 0x00025424, .val = 0x06820C83 },
         { .reg = 0x0002541C, .val = 0x00000000 },
         { .reg = 0x00025428, .val = 0x00000C03 },
         { .reg = 0x000091BC, .val = 0xE0400000 },
         { .reg = 0x000091C4, .val = 0xE4500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002718, .val = 0xAAAAAAAA },
         { .reg = 0x0000271C, .val = 0xAAAAAAAA },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002728, .val = 0xAAAAAAAA },
         { .reg = 0x0000272C, .val = 0xAAAAAAAA },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00000000 },
         { .reg = 0x00002748, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x00000000 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002754, .val = 0x00000000 },
         { .reg = 0x00002758, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = hsw__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuActivePerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuStall";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuStallPerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuActive";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuActive";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuActivePerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuStall";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuStallPerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuActive";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuActivePerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuStall";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuStallPerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuActivePerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuStall";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuStallPerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuActive";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuActivePerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuStall";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuStallPerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuActivePerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_basic__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuStall";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuStallPerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test. Unit: pixels.";
      counter->symbol_name = "AlphaTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test. Unit: pixels.";
      counter->symbol_name = "PostPsStencilTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test. Unit: pixels.";
      counter->symbol_name = "PostPsDepthTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__typed_atomics__read;
      counter->name = "Typed Atomics";
      counter->desc = "The total number of typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 344;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of byten written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 376;

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__compute_basic__llc_accesses__read;
         counter->name = "LLC GPU Accesses";
         counter->desc = "The total number of LLC cache lookups done from the GPU (64b reads, 32B writes). Unit: messages.";
         counter->symbol_name = "LlcAccesses";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 384;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__compute_basic__llc_hits__read;
         counter->name = "LLC GPU Hits";
         counter->desc = "The total number of successful LLC cache lookups done from the GPU. Unit: messages.";
         counter->symbol_name = "LlcHits";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 392;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
hsw_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended set";
   query->symbol_name = "ComputeExtended";
   query->guid = "480f9795-cf6a-4204-a9e3-cd7015515f8d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 22);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A45_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->a_offset = query->gpu_time_offset + 1;
   query->b_offset = query->a_offset + 45;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x0002681C, .val = 0x3EB00800 },
         { .reg = 0x00026820, .val = 0x00900000 },
         { .reg = 0x00025384, .val = 0x02AAAAAA },
         { .reg = 0x00025404, .val = 0x03FFFFFF },
         { .reg = 0x00026800, .val = 0x00142284 },
         { .reg = 0x00026808, .val = 0x0E629062 },
         { .reg = 0x0002680C, .val = 0x3F6F55CB },
         { .reg = 0x00026810, .val = 0x00000014 },
         { .reg = 0x00026804, .val = 0x00000000 },
         { .reg = 0x00026104, .val = 0x02AAAAAA },
         { .reg = 0x00026184, .val = 0x02AAAAAA },
         { .reg = 0x00025420, .val = 0x00000000 },
         { .reg = 0x00025424, .val = 0x00000000 },
         { .reg = 0x0002541C, .val = 0x00000000 },
         { .reg = 0x00025428, .val = 0x00000000 },
         { .reg = 0x000091BC, .val = 0xE0400000 },
         { .reg = 0x000091C4, .val = 0xE4500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FE2A },
         { .reg = 0x00002774, .val = 0x0000FF00 },
         { .reg = 0x00002778, .val = 0x0007FE6A },
         { .reg = 0x0000277C, .val = 0x0000FF00 },
         { .reg = 0x00002780, .val = 0x0007FE92 },
         { .reg = 0x00002784, .val = 0x0000FF00 },
         { .reg = 0x00002788, .val = 0x0007FEA2 },
         { .reg = 0x0000278C, .val = 0x0000FF00 },
         { .reg = 0x00002790, .val = 0x0007FE32 },
         { .reg = 0x00002794, .val = 0x0000FF00 },
         { .reg = 0x00002798, .val = 0x0007FE9A },
         { .reg = 0x0000279C, .val = 0x0000FF00 },
         { .reg = 0x000027A0, .val = 0x0007FF23 },
         { .reg = 0x000027A4, .val = 0x0000FF00 },
         { .reg = 0x000027A8, .val = 0x0007FFF3 },
         { .reg = 0x000027AC, .val = 0x0000FFFE },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__gpu_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__eu_urb_atomics0__read;
      counter->name = "EuUrbAtomics0";
      counter->desc = "The subslice 0 EU URB Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUrbAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__compute_extended__llc_accesses__read;
         counter->name = "LLC GPU Accesses";
         counter->desc = "The total number of LLC cache lookups done from the GPU (64b reads, 32B writes). Unit: messages.";
         counter->symbol_name = "LlcAccesses";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 144;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__compute_extended__llc_hits__read;
         counter->name = "LLC GPU Hits";
         counter->desc = "The total number of successful LLC cache lookups done from the GPU. Unit: messages.";
         counter->symbol_name = "LlcHits";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 152;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
hsw_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution set";
   query->symbol_name = "MemoryReads";
   query->guid = "399d3001-97d6-4240-b065-4fb843138e17";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 56);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A45_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->a_offset = query->gpu_time_offset + 1;
   query->b_offset = query->a_offset + 45;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000253A4, .val = 0x34300000 },
         { .reg = 0x00025440, .val = 0x2D800000 },
         { .reg = 0x00025444, .val = 0x00000008 },
         { .reg = 0x00025128, .val = 0x0E600000 },
         { .reg = 0x00025380, .val = 0x00000450 },
         { .reg = 0x00025390, .val = 0x00052C43 },
         { .reg = 0x00025384, .val = 0x00000000 },
         { .reg = 0x00025400, .val = 0x00006144 },
         { .reg = 0x00025408, .val = 0x0A418820 },
         { .reg = 0x0002540C, .val = 0x000820E6 },
         { .reg = 0x00025404, .val = 0xFF500000 },
         { .reg = 0x00025100, .val = 0x000005D6 },
         { .reg = 0x0002510C, .val = 0x0EF00000 },
         { .reg = 0x00025104, .val = 0x00000000 },
         { .reg = 0x00025420, .val = 0x02108421 },
         { .reg = 0x00025424, .val = 0x00008421 },
         { .reg = 0x0002541C, .val = 0x00000000 },
         { .reg = 0x00025428, .val = 0x00000000 },
         { .reg = 0x000091BC, .val = 0xE0400000 },
         { .reg = 0x000091C4, .val = 0xE4500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x76543298 },
         { .reg = 0x00002748, .val = 0x98989898 },
         { .reg = 0x00002744, .val = 0x000000E4 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x98A98A98 },
         { .reg = 0x00002758, .val = 0x88888888 },
         { .reg = 0x00002754, .val = 0x000C5500 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FC00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FC00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FC00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FC00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FC00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FC00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FC00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FC00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = hsw__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuActivePerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuStall";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuStallPerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuActive";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuActivePerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuStall";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuStallPerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuActive";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuActivePerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuStall";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuStallPerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuActivePerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuStall";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuStallPerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuActive";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuActivePerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuStall";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuStallPerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuActive";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuActivePerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_reads__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuStall";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuStallPerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test. Unit: pixels.";
      counter->symbol_name = "AlphaTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test. Unit: pixels.";
      counter->symbol_name = "PostPsStencilTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test. Unit: pixels.";
      counter->symbol_name = "PostPsDepthTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 344;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 368;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_hi_depth_memory_reads__read;
      counter->name = "GtiHiDepthMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHiDepthMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 376;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 384;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 392;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 400;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads (64B each). Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 408;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_reads__llc_read_accesses__read;
      counter->name = "LLC GPU Read Accesses";
      counter->desc = "The total number of LLC cache lookups for reads done from the GPU. Unit: messages.";
      counter->symbol_name = "LlcReadAccesses";
      counter->category = "LLC";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 416;

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__memory_reads__llc_accesses__read;
         counter->name = "LLC GPU Accesses";
         counter->desc = "The total number of LLC cache lookups done from the GPU (64b reads, 32B writes). Unit: messages.";
         counter->symbol_name = "LlcAccesses";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 424;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__memory_reads__llc_hits__read;
         counter->name = "LLC GPU Hits";
         counter->desc = "The total number of successful LLC cache lookups done from the GPU. Unit: messages.";
         counter->symbol_name = "LlcHits";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 432;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
hsw_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution set";
   query->symbol_name = "MemoryWrites";
   query->guid = "f3c1ff4b-d0da-4ffa-8780-2c6b98f3f2d5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 56);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A45_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->a_offset = query->gpu_time_offset + 1;
   query->b_offset = query->a_offset + 45;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000253A4, .val = 0x34300000 },
         { .reg = 0x00025440, .val = 0x01500000 },
         { .reg = 0x00025444, .val = 0x00000120 },
         { .reg = 0x00025128, .val = 0x0C200000 },
         { .reg = 0x00025380, .val = 0x00000450 },
         { .reg = 0x00025390, .val = 0x00052C43 },
         { .reg = 0x00025384, .val = 0x00000000 },
         { .reg = 0x00025400, .val = 0x00007184 },
         { .reg = 0x00025408, .val = 0x0A418820 },
         { .reg = 0x0002540C, .val = 0x000820E6 },
         { .reg = 0x00025404, .val = 0xFF500000 },
         { .reg = 0x00025100, .val = 0x000005D6 },
         { .reg = 0x0002510C, .val = 0x1E700000 },
         { .reg = 0x00025104, .val = 0x00000000 },
         { .reg = 0x00025420, .val = 0x02108421 },
         { .reg = 0x00025424, .val = 0x00008421 },
         { .reg = 0x0002541C, .val = 0x00000000 },
         { .reg = 0x00025428, .val = 0x00000000 },
         { .reg = 0x000091BC, .val = 0xE0400000 },
         { .reg = 0x000091C4, .val = 0xE4500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x76543298 },
         { .reg = 0x00002748, .val = 0x98989898 },
         { .reg = 0x00002744, .val = 0x000000E4 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0xBABABABA },
         { .reg = 0x00002758, .val = 0x88888888 },
         { .reg = 0x00002754, .val = 0x000C5500 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FC00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FC00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FC00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FC00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FC00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FC00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FC00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FC00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = hsw__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuActivePerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuStall";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuStallPerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuActive";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuActivePerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuStall";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuStallPerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuActive";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuActivePerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuStall";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuStallPerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuActivePerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuStall";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuStallPerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuActive";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuActivePerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuStall";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuStallPerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuActive";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuActivePerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__memory_writes__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuStall";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuStallPerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test. Unit: pixels.";
      counter->symbol_name = "AlphaTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test. Unit: pixels.";
      counter->symbol_name = "PostPsStencilTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test. Unit: pixels.";
      counter->symbol_name = "PostPsDepthTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 344;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 368;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 376;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 384;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 392;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes (64B each). Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 400;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__memory_writes__llc_wr_accesses__read;
      counter->name = "LLC GPU Write Accesses";
      counter->desc = "The total number of LLC cache lookups for write done from the GPU (32B writes). Unit: messages.";
      counter->symbol_name = "LlcWrAccesses";
      counter->category = "LLC";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 408;

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__memory_writes__llc_accesses__read;
         counter->name = "LLC GPU Accesses";
         counter->desc = "The total number of LLC cache lookups done from the GPU (64b reads, 32B writes). Unit: messages.";
         counter->symbol_name = "LlcAccesses";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 416;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__memory_writes__llc_hits__read;
         counter->name = "LLC GPU Hits";
         counter->desc = "The total number of successful LLC cache lookups done from the GPU. Unit: messages.";
         counter->symbol_name = "LlcHits";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 424;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__memory_writes__llc_gpu_throughput__read;
         counter->name = "LLC GPU Throughput";
         counter->desc = "The total number of GPU memory bytes transferred between GPU and LLC. Unit: bytes.";
         counter->symbol_name = "LlcGpuThroughput";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
         counter->raw_max = 0 /* unsupported (varies over time) */;
         counter->offset = 432;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
hsw_register_sampler_balance_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set SamplerBalance";
   query->symbol_name = "SamplerBalance";
   query->guid = "e111cda4-19c3-41ee-b326-f99ac44ebf78";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 57);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A45_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->a_offset = query->gpu_time_offset + 1;
   query->b_offset = query->a_offset + 45;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x0002EB9C, .val = 0x01906400 },
         { .reg = 0x0002FB9C, .val = 0x01906400 },
         { .reg = 0x000253A4, .val = 0x00000000 },
         { .reg = 0x00026B9C, .val = 0x01906400 },
         { .reg = 0x00027B9C, .val = 0x01906400 },
         { .reg = 0x00027104, .val = 0x00A00000 },
         { .reg = 0x00027184, .val = 0x00A50000 },
         { .reg = 0x0002E804, .val = 0x00500000 },
         { .reg = 0x0002E984, .val = 0x00500000 },
         { .reg = 0x0002EB04, .val = 0x00500000 },
         { .reg = 0x0002EB80, .val = 0x00000084 },
         { .reg = 0x0002EB8C, .val = 0x14200000 },
         { .reg = 0x0002EB84, .val = 0x00000000 },
         { .reg = 0x0002F804, .val = 0x00050000 },
         { .reg = 0x0002F984, .val = 0x00050000 },
         { .reg = 0x0002FB04, .val = 0x00050000 },
         { .reg = 0x0002FB80, .val = 0x00000084 },
         { .reg = 0x0002FB8C, .val = 0x00050800 },
         { .reg = 0x0002FB84, .val = 0x00000000 },
         { .reg = 0x00025380, .val = 0x00000010 },
         { .reg = 0x0002538C, .val = 0x000000C0 },
         { .reg = 0x00025384, .val = 0xAA550000 },
         { .reg = 0x00025404, .val = 0xFFFFC000 },
         { .reg = 0x00026804, .val = 0x50000000 },
         { .reg = 0x00026984, .val = 0x50000000 },
         { .reg = 0x00026B04, .val = 0x50000000 },
         { .reg = 0x00026B80, .val = 0x00000084 },
         { .reg = 0x00026B90, .val = 0x00050800 },
         { .reg = 0x00026B84, .val = 0x00000000 },
         { .reg = 0x00027804, .val = 0x05000000 },
         { .reg = 0x00027984, .val = 0x05000000 },
         { .reg = 0x00027B04, .val = 0x05000000 },
         { .reg = 0x00027B80, .val = 0x00000084 },
         { .reg = 0x00027B90, .val = 0x00000142 },
         { .reg = 0x00027B84, .val = 0x00000000 },
         { .reg = 0x00026104, .val = 0xA0000000 },
         { .reg = 0x00026184, .val = 0xA5000000 },
         { .reg = 0x00025424, .val = 0x00008620 },
         { .reg = 0x0002541C, .val = 0x00000000 },
         { .reg = 0x00025428, .val = 0x0004A54A },
         { .reg = 0x000091BC, .val = 0xE0400000 },
         { .reg = 0x000091C4, .val = 0xE4500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = hsw__sampler_balance__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has being processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_threads__read;
      counter->name = "TCS Threads Dispatched";
      counter->desc = "The total number of control shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_threads__read;
      counter->name = "TES Threads Dispatched";
      counter->desc = "The total number of evaluation shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuActivePerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuStall";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuStallPerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__hs_eu_active__read;
      counter->name = "TCS EU Active";
      counter->desc = "The percentage of time in which control shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuActive";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_eu_active_per_thread__read;
      counter->name = "TCS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuActivePerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__hs_eu_stall__read;
      counter->name = "TCS EU Stall";
      counter->desc = "The percentage of time in which control shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "HsEuStall";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_eu_stall_per_thread__read;
      counter->name = "TCS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which control shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "HsEuStallPerThread";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ds_eu_active__read;
      counter->name = "TES EU Active";
      counter->desc = "The percentage of time in which evaluation shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuActive";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_eu_active_per_thread__read;
      counter->name = "TES AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuActivePerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ds_eu_stall__read;
      counter->name = "TES EU Stall";
      counter->desc = "The percentage of time in which evaluation shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "DsEuStall";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_eu_stall_per_thread__read;
      counter->name = "TES AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which evaluation shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "DsEuStallPerThread";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__gs_eu_active__read;
      counter->name = "GS EU Active";
      counter->desc = "The percentage of time in which geometry shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_eu_active_per_thread__read;
      counter->name = "GS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuActivePerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__gs_eu_stall__read;
      counter->name = "GS EU Stall";
      counter->desc = "The percentage of time in which geometry shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "GsEuStall";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_eu_stall_per_thread__read;
      counter->name = "GS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which geometry shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "GsEuStallPerThread";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__cs_eu_active__read;
      counter->name = "CS EU Active";
      counter->desc = "The percentage of time in which compute shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuActive";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_eu_active_per_thread__read;
      counter->name = "CS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuActivePerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__cs_eu_stall__read;
      counter->name = "CS EU Stall";
      counter->desc = "The percentage of time in which compute shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "CsEuStall";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_eu_stall_per_thread__read;
      counter->name = "CS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which compute shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "CsEuStallPerThread";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuActive";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuActivePerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuStall";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuStallPerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__alpha_test_fails__read;
      counter->name = "Alpha Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS alpha test. Unit: pixels.";
      counter->symbol_name = "AlphaTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__post_ps_stencil_test_fails__read;
      counter->name = "Late Stencil Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS stencil test. Unit: pixels.";
      counter->symbol_name = "PostPsStencilTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__post_ps_depth_test_fails__read;
      counter->name = "Late Depth Test Fails";
      counter->desc = "The total number of pixels dropped on post-FS depth test. Unit: pixels.";
      counter->symbol_name = "PostPsDepthTestFails";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ps_duration__read;
      counter->name = "FS Duration";
      counter->desc = "Total Fragment Shader GPU duration. Unit: us.";
      counter->symbol_name = "PsDuration";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__vs_duration__read;
      counter->name = "VS Duration";
      counter->desc = "Total Vertex Shader GPU duration. Unit: us.";
      counter->symbol_name = "VsDuration";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 344;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__gs_duration__read;
      counter->name = "GS Duration";
      counter->desc = "Total Geometry Shader GPU duration. Unit: us.";
      counter->symbol_name = "GsDuration";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__ds_duration__read;
      counter->name = "TES Duration";
      counter->desc = "Total Evaluation Shader GPU duration. Unit: us.";
      counter->symbol_name = "DsDuration";
      counter->category = "EU Array/Evaluation Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__hs_duration__read;
      counter->name = "TCS Duration";
      counter->desc = "Total Control Shader GPU duration. Unit: us.";
      counter->symbol_name = "HsDuration";
      counter->category = "EU Array/Control Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 368;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__cs_duration__read;
      counter->name = "CS Duration";
      counter->desc = "Total Compute Shader GPU duration. Unit: us.";
      counter->symbol_name = "CsDuration";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_US;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 376;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = hsw__sampler_balance__eu_idle__read;
      counter->name = "EU Idle";
      counter->desc = "The percentage of time in which the Execution Units were idle. Unit: percent.";
      counter->symbol_name = "EuIdle";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 384;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler_l2_cache_misses__read;
      counter->name = "Sampler L2 cache misses";
      counter->desc = "Number of sampler L2 cache misses Unit: messages.";
      counter->symbol_name = "SamplerL2CacheMisses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 392;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler0_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss0)";
         counter->desc = "Number of sampler L2 cache misses (ss0) Unit: messages.";
         counter->symbol_name = "Sampler0L2CacheMisses";
         counter->category = "Sampler/Sampler Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 400;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler1_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss1)";
         counter->desc = "Number of sampler L2 cache misses (ss1) Unit: messages.";
         counter->symbol_name = "Sampler1L2CacheMisses";
         counter->category = "Sampler/Sampler Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 408;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler2_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss2)";
         counter->desc = "Number of sampler L2 cache misses (ss2) Unit: messages.";
         counter->symbol_name = "Sampler2L2CacheMisses";
         counter->category = "Sampler/Sampler Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 416;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__sampler3_l2_cache_misses__read;
         counter->name = "Sampler L2 cache misses (ss3)";
         counter->desc = "Number of sampler L2 cache misses (ss3) Unit: messages.";
         counter->symbol_name = "Sampler3L2CacheMisses";
         counter->category = "Sampler/Sampler Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 424;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__llc_accesses__read;
         counter->name = "LLC GPU Accesses";
         counter->desc = "The total number of LLC cache lookups done from the GPU (64b reads, 32B writes). Unit: messages.";
         counter->symbol_name = "LlcAccesses";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 432;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = hsw__sampler_balance__llc_hits__read;
         counter->name = "LLC GPU Hits";
         counter->desc = "The total number of successful LLC cache lookups done from the GPU. Unit: messages.";
         counter->symbol_name = "LlcHits";
         counter->category = "LLC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 440;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_hsw(struct intel_perf_config *perf)
{
   hsw_register_render_basic_counter_query(perf);
   hsw_register_compute_basic_counter_query(perf);
   hsw_register_compute_extended_counter_query(perf);
   hsw_register_memory_reads_counter_query(perf);
   hsw_register_memory_writes_counter_query(perf);
   hsw_register_sampler_balance_counter_query(perf);
}


static void
bdw_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "b541bd57-0e0f-4154-b4c0-5858010a2bf7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.slice_mask & 0x01) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x143F000F },
            { .reg = 0x00009888, .val = 0x14110014 },
            { .reg = 0x00009888, .val = 0x14310014 },
            { .reg = 0x00009888, .val = 0x14BF000F },
            { .reg = 0x00009888, .val = 0x118A0317 },
            { .reg = 0x00009888, .val = 0x13837BE0 },
            { .reg = 0x00009888, .val = 0x3B800060 },
            { .reg = 0x00009888, .val = 0x3D800005 },
            { .reg = 0x00009888, .val = 0x005C4000 },
            { .reg = 0x00009888, .val = 0x065C8000 },
            { .reg = 0x00009888, .val = 0x085CC000 },
            { .reg = 0x00009888, .val = 0x003D8000 },
            { .reg = 0x00009888, .val = 0x183D0800 },
            { .reg = 0x00009888, .val = 0x0A3F0023 },
            { .reg = 0x00009888, .val = 0x103F0000 },
            { .reg = 0x00009888, .val = 0x00584000 },
            { .reg = 0x00009888, .val = 0x08584000 },
            { .reg = 0x00009888, .val = 0x0A5A4000 },
            { .reg = 0x00009888, .val = 0x005B4000 },
            { .reg = 0x00009888, .val = 0x0E5B8000 },
            { .reg = 0x00009888, .val = 0x185B2400 },
            { .reg = 0x00009888, .val = 0x0A1D4000 },
            { .reg = 0x00009888, .val = 0x0C1F0800 },
            { .reg = 0x00009888, .val = 0x0E1FAA00 },
            { .reg = 0x00009888, .val = 0x00384000 },
            { .reg = 0x00009888, .val = 0x0E384000 },
            { .reg = 0x00009888, .val = 0x16384000 },
            { .reg = 0x00009888, .val = 0x18380001 },
            { .reg = 0x00009888, .val = 0x00392000 },
            { .reg = 0x00009888, .val = 0x06398000 },
            { .reg = 0x00009888, .val = 0x0839A000 },
            { .reg = 0x00009888, .val = 0x0A391000 },
            { .reg = 0x00009888, .val = 0x00104000 },
            { .reg = 0x00009888, .val = 0x08104000 },
            { .reg = 0x00009888, .val = 0x00110030 },
            { .reg = 0x00009888, .val = 0x08110031 },
            { .reg = 0x00009888, .val = 0x10110000 },
            { .reg = 0x00009888, .val = 0x00134000 },
            { .reg = 0x00009888, .val = 0x16130020 },
            { .reg = 0x00009888, .val = 0x06308000 },
            { .reg = 0x00009888, .val = 0x08308000 },
            { .reg = 0x00009888, .val = 0x06311800 },
            { .reg = 0x00009888, .val = 0x08311880 },
            { .reg = 0x00009888, .val = 0x10310000 },
            { .reg = 0x00009888, .val = 0x0E334000 },
            { .reg = 0x00009888, .val = 0x16330080 },
            { .reg = 0x00009888, .val = 0x0ABF1180 },
            { .reg = 0x00009888, .val = 0x10BF0000 },
            { .reg = 0x00009888, .val = 0x0ADA8000 },
            { .reg = 0x00009888, .val = 0x0A9D8000 },
            { .reg = 0x00009888, .val = 0x109F0002 },
            { .reg = 0x00009888, .val = 0x0AB94000 },
            { .reg = 0x00009888, .val = 0x0D888000 },
            { .reg = 0x00009888, .val = 0x038A0380 },
            { .reg = 0x00009888, .val = 0x058A000E },
            { .reg = 0x00009888, .val = 0x018A8000 },
            { .reg = 0x00009888, .val = 0x0F8A8000 },
            { .reg = 0x00009888, .val = 0x198A8000 },
            { .reg = 0x00009888, .val = 0x1B8A00A0 },
            { .reg = 0x00009888, .val = 0x078A0000 },
            { .reg = 0x00009888, .val = 0x098A0000 },
            { .reg = 0x00009888, .val = 0x238B2820 },
            { .reg = 0x00009888, .val = 0x258B2550 },
            { .reg = 0x00009888, .val = 0x198C1000 },
            { .reg = 0x00009888, .val = 0x0B8D8000 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA0 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x0D831021 },
            { .reg = 0x00009888, .val = 0x0F83572F },
            { .reg = 0x00009888, .val = 0x01835680 },
            { .reg = 0x00009888, .val = 0x0383002C },
            { .reg = 0x00009888, .val = 0x11830000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830001 },
            { .reg = 0x00009888, .val = 0x05830000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x05844000 },
            { .reg = 0x00009888, .val = 0x1B80C137 },
            { .reg = 0x00009888, .val = 0x1D80C147 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x15804000 },
            { .reg = 0x00009888, .val = 0x4D801110 },
            { .reg = 0x00009888, .val = 0x4F800331 },
            { .reg = 0x00009888, .val = 0x43800802 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45801465 },
            { .reg = 0x00009888, .val = 0x53801111 },
            { .reg = 0x00009888, .val = 0x478014A5 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800CA5 },
            { .reg = 0x00009888, .val = 0x41800003 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x143F000F },
            { .reg = 0x00009888, .val = 0x14BF000F },
            { .reg = 0x00009888, .val = 0x14910014 },
            { .reg = 0x00009888, .val = 0x14B10014 },
            { .reg = 0x00009888, .val = 0x118A0317 },
            { .reg = 0x00009888, .val = 0x13837BE0 },
            { .reg = 0x00009888, .val = 0x3B800060 },
            { .reg = 0x00009888, .val = 0x3D800005 },
            { .reg = 0x00009888, .val = 0x0A3F0023 },
            { .reg = 0x00009888, .val = 0x103F0000 },
            { .reg = 0x00009888, .val = 0x0A5A4000 },
            { .reg = 0x00009888, .val = 0x0A1D4000 },
            { .reg = 0x00009888, .val = 0x0E1F8000 },
            { .reg = 0x00009888, .val = 0x0A391000 },
            { .reg = 0x00009888, .val = 0x00DC4000 },
            { .reg = 0x00009888, .val = 0x06DC8000 },
            { .reg = 0x00009888, .val = 0x08DCC000 },
            { .reg = 0x00009888, .val = 0x00BD8000 },
            { .reg = 0x00009888, .val = 0x18BD0800 },
            { .reg = 0x00009888, .val = 0x0ABF1180 },
            { .reg = 0x00009888, .val = 0x10BF0000 },
            { .reg = 0x00009888, .val = 0x00D84000 },
            { .reg = 0x00009888, .val = 0x08D84000 },
            { .reg = 0x00009888, .val = 0x0ADA8000 },
            { .reg = 0x00009888, .val = 0x00DB4000 },
            { .reg = 0x00009888, .val = 0x0EDB8000 },
            { .reg = 0x00009888, .val = 0x18DB2400 },
            { .reg = 0x00009888, .val = 0x0A9D8000 },
            { .reg = 0x00009888, .val = 0x0C9F0800 },
            { .reg = 0x00009888, .val = 0x0E9F2A00 },
            { .reg = 0x00009888, .val = 0x109F0002 },
            { .reg = 0x00009888, .val = 0x00B84000 },
            { .reg = 0x00009888, .val = 0x0EB84000 },
            { .reg = 0x00009888, .val = 0x16B84000 },
            { .reg = 0x00009888, .val = 0x18B80001 },
            { .reg = 0x00009888, .val = 0x00B92000 },
            { .reg = 0x00009888, .val = 0x06B98000 },
            { .reg = 0x00009888, .val = 0x08B9A000 },
            { .reg = 0x00009888, .val = 0x0AB94000 },
            { .reg = 0x00009888, .val = 0x00904000 },
            { .reg = 0x00009888, .val = 0x08904000 },
            { .reg = 0x00009888, .val = 0x00910030 },
            { .reg = 0x00009888, .val = 0x08910031 },
            { .reg = 0x00009888, .val = 0x10910000 },
            { .reg = 0x00009888, .val = 0x00934000 },
            { .reg = 0x00009888, .val = 0x16930020 },
            { .reg = 0x00009888, .val = 0x06B08000 },
            { .reg = 0x00009888, .val = 0x08B08000 },
            { .reg = 0x00009888, .val = 0x06B11800 },
            { .reg = 0x00009888, .val = 0x08B11880 },
            { .reg = 0x00009888, .val = 0x10B10000 },
            { .reg = 0x00009888, .val = 0x0EB34000 },
            { .reg = 0x00009888, .val = 0x16B30080 },
            { .reg = 0x00009888, .val = 0x01888000 },
            { .reg = 0x00009888, .val = 0x0D88B800 },
            { .reg = 0x00009888, .val = 0x038A0380 },
            { .reg = 0x00009888, .val = 0x058A000E },
            { .reg = 0x00009888, .val = 0x1B8A0080 },
            { .reg = 0x00009888, .val = 0x078A0000 },
            { .reg = 0x00009888, .val = 0x098A0000 },
            { .reg = 0x00009888, .val = 0x238B2840 },
            { .reg = 0x00009888, .val = 0x258B26A0 },
            { .reg = 0x00009888, .val = 0x018C4000 },
            { .reg = 0x00009888, .val = 0x0F8C4000 },
            { .reg = 0x00009888, .val = 0x178C2000 },
            { .reg = 0x00009888, .val = 0x198C1100 },
            { .reg = 0x00009888, .val = 0x018D2000 },
            { .reg = 0x00009888, .val = 0x078D8000 },
            { .reg = 0x00009888, .val = 0x098DA000 },
            { .reg = 0x00009888, .val = 0x0B8D8000 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA0 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x0D831021 },
            { .reg = 0x00009888, .val = 0x0F83572F },
            { .reg = 0x00009888, .val = 0x01835680 },
            { .reg = 0x00009888, .val = 0x0383002C },
            { .reg = 0x00009888, .val = 0x11830000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830001 },
            { .reg = 0x00009888, .val = 0x05830000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x05844000 },
            { .reg = 0x00009888, .val = 0x1B80C137 },
            { .reg = 0x00009888, .val = 0x1D80C147 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x15804000 },
            { .reg = 0x00009888, .val = 0x4D801550 },
            { .reg = 0x00009888, .val = 0x4F800331 },
            { .reg = 0x00009888, .val = 0x43800802 },
            { .reg = 0x00009888, .val = 0x51800400 },
            { .reg = 0x00009888, .val = 0x458004A1 },
            { .reg = 0x00009888, .val = 0x53805555 },
            { .reg = 0x00009888, .val = 0x47800421 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F801421 },
            { .reg = 0x00009888, .val = 0x41800845 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "35fbc9b2-a891-40a6-a38d-022bb7057552";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.slice_mask & 0x01) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x105C00E0 },
            { .reg = 0x00009888, .val = 0x105800E0 },
            { .reg = 0x00009888, .val = 0x103800E0 },
            { .reg = 0x00009888, .val = 0x3580001A },
            { .reg = 0x00009888, .val = 0x3B800060 },
            { .reg = 0x00009888, .val = 0x3D800005 },
            { .reg = 0x00009888, .val = 0x065C2100 },
            { .reg = 0x00009888, .val = 0x0A5C0041 },
            { .reg = 0x00009888, .val = 0x0C5C6600 },
            { .reg = 0x00009888, .val = 0x005C6580 },
            { .reg = 0x00009888, .val = 0x085C8000 },
            { .reg = 0x00009888, .val = 0x0E5C8000 },
            { .reg = 0x00009888, .val = 0x00580042 },
            { .reg = 0x00009888, .val = 0x08582080 },
            { .reg = 0x00009888, .val = 0x0C58004C },
            { .reg = 0x00009888, .val = 0x0E582580 },
            { .reg = 0x00009888, .val = 0x005B4000 },
            { .reg = 0x00009888, .val = 0x185B1000 },
            { .reg = 0x00009888, .val = 0x1A5B0104 },
            { .reg = 0x00009888, .val = 0x0C1FA800 },
            { .reg = 0x00009888, .val = 0x0E1FAA00 },
            { .reg = 0x00009888, .val = 0x101F02AA },
            { .reg = 0x00009888, .val = 0x08380042 },
            { .reg = 0x00009888, .val = 0x0A382080 },
            { .reg = 0x00009888, .val = 0x0E38404C },
            { .reg = 0x00009888, .val = 0x0238404B },
            { .reg = 0x00009888, .val = 0x00384000 },
            { .reg = 0x00009888, .val = 0x16380000 },
            { .reg = 0x00009888, .val = 0x18381145 },
            { .reg = 0x00009888, .val = 0x04380000 },
            { .reg = 0x00009888, .val = 0x0039A000 },
            { .reg = 0x00009888, .val = 0x06398000 },
            { .reg = 0x00009888, .val = 0x0839A000 },
            { .reg = 0x00009888, .val = 0x0A39A000 },
            { .reg = 0x00009888, .val = 0x0C39A000 },
            { .reg = 0x00009888, .val = 0x0E39A000 },
            { .reg = 0x00009888, .val = 0x02392000 },
            { .reg = 0x00009888, .val = 0x018A8000 },
            { .reg = 0x00009888, .val = 0x0F8A8000 },
            { .reg = 0x00009888, .val = 0x198A8000 },
            { .reg = 0x00009888, .val = 0x1B8AAAA0 },
            { .reg = 0x00009888, .val = 0x1D8A0002 },
            { .reg = 0x00009888, .val = 0x038A8000 },
            { .reg = 0x00009888, .val = 0x058A8000 },
            { .reg = 0x00009888, .val = 0x238B02A0 },
            { .reg = 0x00009888, .val = 0x258B5550 },
            { .reg = 0x00009888, .val = 0x278B0015 },
            { .reg = 0x00009888, .val = 0x1F850A80 },
            { .reg = 0x00009888, .val = 0x2185AAA0 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x03844000 },
            { .reg = 0x00009888, .val = 0x17808137 },
            { .reg = 0x00009888, .val = 0x1980C147 },
            { .reg = 0x00009888, .val = 0x1B80C0E5 },
            { .reg = 0x00009888, .val = 0x1D80C0E3 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x13804000 },
            { .reg = 0x00009888, .val = 0x15800000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D801000 },
            { .reg = 0x00009888, .val = 0x4F800111 },
            { .reg = 0x00009888, .val = 0x43800062 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800062 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800062 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F801062 },
            { .reg = 0x00009888, .val = 0x41801084 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x10DC00E0 },
            { .reg = 0x00009888, .val = 0x10D800E0 },
            { .reg = 0x00009888, .val = 0x10B800E0 },
            { .reg = 0x00009888, .val = 0x3580001A },
            { .reg = 0x00009888, .val = 0x3B800060 },
            { .reg = 0x00009888, .val = 0x3D800005 },
            { .reg = 0x00009888, .val = 0x06DC2100 },
            { .reg = 0x00009888, .val = 0x0ADC0041 },
            { .reg = 0x00009888, .val = 0x0CDC6600 },
            { .reg = 0x00009888, .val = 0x00DC6580 },
            { .reg = 0x00009888, .val = 0x08DC8000 },
            { .reg = 0x00009888, .val = 0x0EDC8000 },
            { .reg = 0x00009888, .val = 0x00D80042 },
            { .reg = 0x00009888, .val = 0x08D82080 },
            { .reg = 0x00009888, .val = 0x0CD8004C },
            { .reg = 0x00009888, .val = 0x0ED82580 },
            { .reg = 0x00009888, .val = 0x00DB4000 },
            { .reg = 0x00009888, .val = 0x18DB1000 },
            { .reg = 0x00009888, .val = 0x1ADB0104 },
            { .reg = 0x00009888, .val = 0x0C9FA800 },
            { .reg = 0x00009888, .val = 0x0E9FAA00 },
            { .reg = 0x00009888, .val = 0x109F02AA },
            { .reg = 0x00009888, .val = 0x08B80042 },
            { .reg = 0x00009888, .val = 0x0AB82080 },
            { .reg = 0x00009888, .val = 0x0EB8404C },
            { .reg = 0x00009888, .val = 0x02B8404B },
            { .reg = 0x00009888, .val = 0x00B84000 },
            { .reg = 0x00009888, .val = 0x16B80000 },
            { .reg = 0x00009888, .val = 0x18B81145 },
            { .reg = 0x00009888, .val = 0x04B80000 },
            { .reg = 0x00009888, .val = 0x00B9A000 },
            { .reg = 0x00009888, .val = 0x06B98000 },
            { .reg = 0x00009888, .val = 0x08B9A000 },
            { .reg = 0x00009888, .val = 0x0AB9A000 },
            { .reg = 0x00009888, .val = 0x0CB9A000 },
            { .reg = 0x00009888, .val = 0x0EB9A000 },
            { .reg = 0x00009888, .val = 0x02B92000 },
            { .reg = 0x00009888, .val = 0x01888000 },
            { .reg = 0x00009888, .val = 0x0D88F800 },
            { .reg = 0x00009888, .val = 0x0F88000F },
            { .reg = 0x00009888, .val = 0x03888000 },
            { .reg = 0x00009888, .val = 0x05888000 },
            { .reg = 0x00009888, .val = 0x238B0540 },
            { .reg = 0x00009888, .val = 0x258BAAA0 },
            { .reg = 0x00009888, .val = 0x278B002A },
            { .reg = 0x00009888, .val = 0x018C4000 },
            { .reg = 0x00009888, .val = 0x0F8C4000 },
            { .reg = 0x00009888, .val = 0x178C2000 },
            { .reg = 0x00009888, .val = 0x198C5500 },
            { .reg = 0x00009888, .val = 0x1B8C0015 },
            { .reg = 0x00009888, .val = 0x038C4000 },
            { .reg = 0x00009888, .val = 0x058C4000 },
            { .reg = 0x00009888, .val = 0x018DA000 },
            { .reg = 0x00009888, .val = 0x078D8000 },
            { .reg = 0x00009888, .val = 0x098DA000 },
            { .reg = 0x00009888, .val = 0x0B8DA000 },
            { .reg = 0x00009888, .val = 0x0D8DA000 },
            { .reg = 0x00009888, .val = 0x0F8DA000 },
            { .reg = 0x00009888, .val = 0x038D2000 },
            { .reg = 0x00009888, .val = 0x1F850A80 },
            { .reg = 0x00009888, .val = 0x2185AAA0 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x03844000 },
            { .reg = 0x00009888, .val = 0x17808137 },
            { .reg = 0x00009888, .val = 0x1980C147 },
            { .reg = 0x00009888, .val = 0x1B80C0E5 },
            { .reg = 0x00009888, .val = 0x1D80C0E3 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x13804000 },
            { .reg = 0x00009888, .val = 0x15800000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D805000 },
            { .reg = 0x00009888, .val = 0x4F800555 },
            { .reg = 0x00009888, .val = 0x43800062 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800062 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800062 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800062 },
            { .reg = 0x00009888, .val = 0x41800000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of typed memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "233d0544-fff7-4281-8291-e02f222aff72";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x0A1E0000 },
         { .reg = 0x00009888, .val = 0x0C1F000F },
         { .reg = 0x00009888, .val = 0x10176800 },
         { .reg = 0x00009888, .val = 0x1191001F },
         { .reg = 0x00009888, .val = 0x0B880320 },
         { .reg = 0x00009888, .val = 0x01890C40 },
         { .reg = 0x00009888, .val = 0x118A1C00 },
         { .reg = 0x00009888, .val = 0x118D7C00 },
         { .reg = 0x00009888, .val = 0x118E0020 },
         { .reg = 0x00009888, .val = 0x118F4C00 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x13900001 },
         { .reg = 0x00009888, .val = 0x065C4000 },
         { .reg = 0x00009888, .val = 0x0C3D8000 },
         { .reg = 0x00009888, .val = 0x06584000 },
         { .reg = 0x00009888, .val = 0x0C5B4000 },
         { .reg = 0x00009888, .val = 0x081E0040 },
         { .reg = 0x00009888, .val = 0x0E1E0000 },
         { .reg = 0x00009888, .val = 0x021F5400 },
         { .reg = 0x00009888, .val = 0x001F0000 },
         { .reg = 0x00009888, .val = 0x101F0010 },
         { .reg = 0x00009888, .val = 0x0E1F0080 },
         { .reg = 0x00009888, .val = 0x0C384000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x0C13C000 },
         { .reg = 0x00009888, .val = 0x06164000 },
         { .reg = 0x00009888, .val = 0x06170012 },
         { .reg = 0x00009888, .val = 0x00170000 },
         { .reg = 0x00009888, .val = 0x01910005 },
         { .reg = 0x00009888, .val = 0x07880002 },
         { .reg = 0x00009888, .val = 0x01880C00 },
         { .reg = 0x00009888, .val = 0x0F880000 },
         { .reg = 0x00009888, .val = 0x0D880000 },
         { .reg = 0x00009888, .val = 0x05880000 },
         { .reg = 0x00009888, .val = 0x09890032 },
         { .reg = 0x00009888, .val = 0x078A0800 },
         { .reg = 0x00009888, .val = 0x0F8A0A00 },
         { .reg = 0x00009888, .val = 0x198A4000 },
         { .reg = 0x00009888, .val = 0x1B8A2000 },
         { .reg = 0x00009888, .val = 0x1D8A0000 },
         { .reg = 0x00009888, .val = 0x038A4000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x238B54C0 },
         { .reg = 0x00009888, .val = 0x258BAA55 },
         { .reg = 0x00009888, .val = 0x278B0019 },
         { .reg = 0x00009888, .val = 0x198C0100 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x0F8D0015 },
         { .reg = 0x00009888, .val = 0x018D1000 },
         { .reg = 0x00009888, .val = 0x098D8000 },
         { .reg = 0x00009888, .val = 0x0B8DF000 },
         { .reg = 0x00009888, .val = 0x0D8D3000 },
         { .reg = 0x00009888, .val = 0x038DE000 },
         { .reg = 0x00009888, .val = 0x058D3000 },
         { .reg = 0x00009888, .val = 0x0D8E0004 },
         { .reg = 0x00009888, .val = 0x058E000C },
         { .reg = 0x00009888, .val = 0x098E0000 },
         { .reg = 0x00009888, .val = 0x078E0000 },
         { .reg = 0x00009888, .val = 0x038E0000 },
         { .reg = 0x00009888, .val = 0x0B8F0020 },
         { .reg = 0x00009888, .val = 0x198F0C00 },
         { .reg = 0x00009888, .val = 0x078F8000 },
         { .reg = 0x00009888, .val = 0x098F4000 },
         { .reg = 0x00009888, .val = 0x0B900980 },
         { .reg = 0x00009888, .val = 0x03900D80 },
         { .reg = 0x00009888, .val = 0x01900000 },
         { .reg = 0x00009888, .val = 0x1F85AA80 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x01834000 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0184C000 },
         { .reg = 0x00009888, .val = 0x0784C000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x1180C000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00000D24, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x4D801111 },
         { .reg = 0x00009888, .val = 0x3D800800 },
         { .reg = 0x00009888, .val = 0x4F801011 },
         { .reg = 0x00009888, .val = 0x43800443 },
         { .reg = 0x00009888, .val = 0x51801111 },
         { .reg = 0x00009888, .val = 0x45800422 },
         { .reg = 0x00009888, .val = 0x53801111 },
         { .reg = 0x00009888, .val = 0x47800C60 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x3F800422 },
         { .reg = 0x00009888, .val = 0x41800021 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 276;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metric set";
   query->symbol_name = "MemoryReads";
   query->guid = "2b255d48-2117-4fef-a8f7-f151e1d25a2c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x198B0343 },
         { .reg = 0x00009888, .val = 0x13845800 },
         { .reg = 0x00009888, .val = 0x15840018 },
         { .reg = 0x00009888, .val = 0x3580001A },
         { .reg = 0x00009888, .val = 0x038B6300 },
         { .reg = 0x00009888, .val = 0x058B6B62 },
         { .reg = 0x00009888, .val = 0x078B006A },
         { .reg = 0x00009888, .val = 0x118B0000 },
         { .reg = 0x00009888, .val = 0x238B0000 },
         { .reg = 0x00009888, .val = 0x258B0000 },
         { .reg = 0x00009888, .val = 0x1F85A080 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385000A },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x01840018 },
         { .reg = 0x00009888, .val = 0x07844C80 },
         { .reg = 0x00009888, .val = 0x09840D9A },
         { .reg = 0x00009888, .val = 0x0B840E9C },
         { .reg = 0x00009888, .val = 0x0D840F9E },
         { .reg = 0x00009888, .val = 0x0F840010 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x03848000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x2F8000E5 },
         { .reg = 0x00009888, .val = 0x138080E3 },
         { .reg = 0x00009888, .val = 0x1580C0E1 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x11804000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F804000 },
         { .reg = 0x00000D24, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3D800800 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800842 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800842 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47801042 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x3F800084 },
         { .reg = 0x00009888, .val = 0x41800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 320;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metric set";
   query->symbol_name = "MemoryWrites";
   query->guid = "f7fd3220-b466-4a4d-9f98-b0caf3f2394c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x198B0343 },
         { .reg = 0x00009888, .val = 0x13845400 },
         { .reg = 0x00009888, .val = 0x3580001A },
         { .reg = 0x00009888, .val = 0x3D800805 },
         { .reg = 0x00009888, .val = 0x038B6300 },
         { .reg = 0x00009888, .val = 0x058B6B62 },
         { .reg = 0x00009888, .val = 0x078B006A },
         { .reg = 0x00009888, .val = 0x118B0000 },
         { .reg = 0x00009888, .val = 0x238B0000 },
         { .reg = 0x00009888, .val = 0x258B0000 },
         { .reg = 0x00009888, .val = 0x1F85A080 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x23850002 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x01840010 },
         { .reg = 0x00009888, .val = 0x07844880 },
         { .reg = 0x00009888, .val = 0x09840992 },
         { .reg = 0x00009888, .val = 0x0B840A94 },
         { .reg = 0x00009888, .val = 0x0D840B96 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x03848000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x2D800147 },
         { .reg = 0x00009888, .val = 0x2F8000E5 },
         { .reg = 0x00009888, .val = 0x138080E3 },
         { .reg = 0x00009888, .val = 0x1580C0E1 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x11804000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F800000 },
         { .reg = 0x00000D24, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800842 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800842 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47801082 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x3F800084 },
         { .reg = 0x00009888, .val = 0x41800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended metric set";
   query->symbol_name = "ComputeExtended";
   query->guid = "e99ccaca-821c-4df9-97a7-96bdb7204e43";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.subslice_mask & 0x01) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x143D0160 },
            { .reg = 0x00009888, .val = 0x163D2800 },
            { .reg = 0x00009888, .val = 0x183D0120 },
            { .reg = 0x00009888, .val = 0x105800E0 },
            { .reg = 0x00009888, .val = 0x005CC000 },
            { .reg = 0x00009888, .val = 0x065C8000 },
            { .reg = 0x00009888, .val = 0x085CC000 },
            { .reg = 0x00009888, .val = 0x0A5CC000 },
            { .reg = 0x00009888, .val = 0x0C5CC000 },
            { .reg = 0x00009888, .val = 0x0E5CC000 },
            { .reg = 0x00009888, .val = 0x025CC000 },
            { .reg = 0x00009888, .val = 0x045CC000 },
            { .reg = 0x00009888, .val = 0x003D0011 },
            { .reg = 0x00009888, .val = 0x063D0900 },
            { .reg = 0x00009888, .val = 0x083D0A13 },
            { .reg = 0x00009888, .val = 0x0A3D0B15 },
            { .reg = 0x00009888, .val = 0x0C3D2317 },
            { .reg = 0x00009888, .val = 0x043D21B7 },
            { .reg = 0x00009888, .val = 0x103D0000 },
            { .reg = 0x00009888, .val = 0x0E3D0000 },
            { .reg = 0x00009888, .val = 0x1A3D0000 },
            { .reg = 0x00009888, .val = 0x0E5825C1 },
            { .reg = 0x00009888, .val = 0x00586100 },
            { .reg = 0x00009888, .val = 0x0258204C },
            { .reg = 0x00009888, .val = 0x06588000 },
            { .reg = 0x00009888, .val = 0x0858C000 },
            { .reg = 0x00009888, .val = 0x0A58C000 },
            { .reg = 0x00009888, .val = 0x0C58C000 },
            { .reg = 0x00009888, .val = 0x0458C000 },
            { .reg = 0x00009888, .val = 0x005B4000 },
            { .reg = 0x00009888, .val = 0x0E5B4000 },
            { .reg = 0x00009888, .val = 0x185B5400 },
            { .reg = 0x00009888, .val = 0x1A5B0155 },
            { .reg = 0x00009888, .val = 0x025B4000 },
            { .reg = 0x00009888, .val = 0x045B4000 },
            { .reg = 0x00009888, .val = 0x065B4000 },
            { .reg = 0x00009888, .val = 0x085B4000 },
            { .reg = 0x00009888, .val = 0x0A5B4000 },
            { .reg = 0x00009888, .val = 0x0C1FA800 },
            { .reg = 0x00009888, .val = 0x0E1FAA2A },
            { .reg = 0x00009888, .val = 0x101F02AA },
            { .reg = 0x00009888, .val = 0x00384000 },
            { .reg = 0x00009888, .val = 0x0E384000 },
            { .reg = 0x00009888, .val = 0x16384000 },
            { .reg = 0x00009888, .val = 0x18381555 },
            { .reg = 0x00009888, .val = 0x02384000 },
            { .reg = 0x00009888, .val = 0x04384000 },
            { .reg = 0x00009888, .val = 0x06384000 },
            { .reg = 0x00009888, .val = 0x08384000 },
            { .reg = 0x00009888, .val = 0x0A384000 },
            { .reg = 0x00009888, .val = 0x0039A000 },
            { .reg = 0x00009888, .val = 0x06398000 },
            { .reg = 0x00009888, .val = 0x0839A000 },
            { .reg = 0x00009888, .val = 0x0A39A000 },
            { .reg = 0x00009888, .val = 0x0C39A000 },
            { .reg = 0x00009888, .val = 0x0E39A000 },
            { .reg = 0x00009888, .val = 0x0239A000 },
            { .reg = 0x00009888, .val = 0x0439A000 },
            { .reg = 0x00009888, .val = 0x018A8000 },
            { .reg = 0x00009888, .val = 0x0F8A8000 },
            { .reg = 0x00009888, .val = 0x198A8000 },
            { .reg = 0x00009888, .val = 0x1B8AAAA0 },
            { .reg = 0x00009888, .val = 0x1D8A0002 },
            { .reg = 0x00009888, .val = 0x038A8000 },
            { .reg = 0x00009888, .val = 0x058A8000 },
            { .reg = 0x00009888, .val = 0x078A8000 },
            { .reg = 0x00009888, .val = 0x098A8000 },
            { .reg = 0x00009888, .val = 0x0B8A8000 },
            { .reg = 0x00009888, .val = 0x238B2AA0 },
            { .reg = 0x00009888, .val = 0x258B5551 },
            { .reg = 0x00009888, .val = 0x278B0015 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA2 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1D80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D800000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800000 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.subslice_mask & 0x02) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x105C00E0 },
            { .reg = 0x00009888, .val = 0x145B0160 },
            { .reg = 0x00009888, .val = 0x165B2800 },
            { .reg = 0x00009888, .val = 0x185B0120 },
            { .reg = 0x00009888, .val = 0x0E5C25C1 },
            { .reg = 0x00009888, .val = 0x005C6100 },
            { .reg = 0x00009888, .val = 0x025C204C },
            { .reg = 0x00009888, .val = 0x065C8000 },
            { .reg = 0x00009888, .val = 0x085CC000 },
            { .reg = 0x00009888, .val = 0x0A5CC000 },
            { .reg = 0x00009888, .val = 0x0C5CC000 },
            { .reg = 0x00009888, .val = 0x045CC000 },
            { .reg = 0x00009888, .val = 0x005B0011 },
            { .reg = 0x00009888, .val = 0x065B0900 },
            { .reg = 0x00009888, .val = 0x085B0A13 },
            { .reg = 0x00009888, .val = 0x0A5B0B15 },
            { .reg = 0x00009888, .val = 0x0C5B2317 },
            { .reg = 0x00009888, .val = 0x045B21B7 },
            { .reg = 0x00009888, .val = 0x105B0000 },
            { .reg = 0x00009888, .val = 0x0E5B0000 },
            { .reg = 0x00009888, .val = 0x1A5B0000 },
            { .reg = 0x00009888, .val = 0x0C1FA800 },
            { .reg = 0x00009888, .val = 0x0E1FAA2A },
            { .reg = 0x00009888, .val = 0x101F02AA },
            { .reg = 0x00009888, .val = 0x00384000 },
            { .reg = 0x00009888, .val = 0x0E384000 },
            { .reg = 0x00009888, .val = 0x16384000 },
            { .reg = 0x00009888, .val = 0x18381555 },
            { .reg = 0x00009888, .val = 0x02384000 },
            { .reg = 0x00009888, .val = 0x04384000 },
            { .reg = 0x00009888, .val = 0x06384000 },
            { .reg = 0x00009888, .val = 0x08384000 },
            { .reg = 0x00009888, .val = 0x0A384000 },
            { .reg = 0x00009888, .val = 0x0039A000 },
            { .reg = 0x00009888, .val = 0x06398000 },
            { .reg = 0x00009888, .val = 0x0839A000 },
            { .reg = 0x00009888, .val = 0x0A39A000 },
            { .reg = 0x00009888, .val = 0x0C39A000 },
            { .reg = 0x00009888, .val = 0x0E39A000 },
            { .reg = 0x00009888, .val = 0x0239A000 },
            { .reg = 0x00009888, .val = 0x0439A000 },
            { .reg = 0x00009888, .val = 0x018A8000 },
            { .reg = 0x00009888, .val = 0x0F8A8000 },
            { .reg = 0x00009888, .val = 0x198A8000 },
            { .reg = 0x00009888, .val = 0x1B8AAAA0 },
            { .reg = 0x00009888, .val = 0x1D8A0002 },
            { .reg = 0x00009888, .val = 0x038A8000 },
            { .reg = 0x00009888, .val = 0x058A8000 },
            { .reg = 0x00009888, .val = 0x078A8000 },
            { .reg = 0x00009888, .val = 0x098A8000 },
            { .reg = 0x00009888, .val = 0x0B8A8000 },
            { .reg = 0x00009888, .val = 0x238B2AA0 },
            { .reg = 0x00009888, .val = 0x258B5551 },
            { .reg = 0x00009888, .val = 0x278B0015 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA2 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1D80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D800000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800000 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.subslice_mask & 0x04) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x103800E0 },
            { .reg = 0x00009888, .val = 0x143A0160 },
            { .reg = 0x00009888, .val = 0x163A2800 },
            { .reg = 0x00009888, .val = 0x183A0120 },
            { .reg = 0x00009888, .val = 0x0C1FA800 },
            { .reg = 0x00009888, .val = 0x0E1FAA2A },
            { .reg = 0x00009888, .val = 0x101F02AA },
            { .reg = 0x00009888, .val = 0x0E38A5C1 },
            { .reg = 0x00009888, .val = 0x0038A100 },
            { .reg = 0x00009888, .val = 0x0238204C },
            { .reg = 0x00009888, .val = 0x16388000 },
            { .reg = 0x00009888, .val = 0x183802AA },
            { .reg = 0x00009888, .val = 0x04380000 },
            { .reg = 0x00009888, .val = 0x06380000 },
            { .reg = 0x00009888, .val = 0x08388000 },
            { .reg = 0x00009888, .val = 0x0A388000 },
            { .reg = 0x00009888, .val = 0x0039A000 },
            { .reg = 0x00009888, .val = 0x06398000 },
            { .reg = 0x00009888, .val = 0x0839A000 },
            { .reg = 0x00009888, .val = 0x0A39A000 },
            { .reg = 0x00009888, .val = 0x0C39A000 },
            { .reg = 0x00009888, .val = 0x0E39A000 },
            { .reg = 0x00009888, .val = 0x0239A000 },
            { .reg = 0x00009888, .val = 0x0439A000 },
            { .reg = 0x00009888, .val = 0x003A0011 },
            { .reg = 0x00009888, .val = 0x063A0900 },
            { .reg = 0x00009888, .val = 0x083A0A13 },
            { .reg = 0x00009888, .val = 0x0A3A0B15 },
            { .reg = 0x00009888, .val = 0x0C3A2317 },
            { .reg = 0x00009888, .val = 0x043A21B7 },
            { .reg = 0x00009888, .val = 0x103A0000 },
            { .reg = 0x00009888, .val = 0x0E3A0000 },
            { .reg = 0x00009888, .val = 0x1A3A0000 },
            { .reg = 0x00009888, .val = 0x018A8000 },
            { .reg = 0x00009888, .val = 0x0F8A8000 },
            { .reg = 0x00009888, .val = 0x198A8000 },
            { .reg = 0x00009888, .val = 0x1B8AAAA0 },
            { .reg = 0x00009888, .val = 0x1D8A0002 },
            { .reg = 0x00009888, .val = 0x038A8000 },
            { .reg = 0x00009888, .val = 0x058A8000 },
            { .reg = 0x00009888, .val = 0x078A8000 },
            { .reg = 0x00009888, .val = 0x098A8000 },
            { .reg = 0x00009888, .val = 0x0B8A8000 },
            { .reg = 0x00009888, .val = 0x238B2AA0 },
            { .reg = 0x00009888, .val = 0x258B5551 },
            { .reg = 0x00009888, .val = 0x278B0015 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA2 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1D80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D800000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800000 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.subslice_mask & 0x08) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x14BD0160 },
            { .reg = 0x00009888, .val = 0x16BD2800 },
            { .reg = 0x00009888, .val = 0x18BD0120 },
            { .reg = 0x00009888, .val = 0x10D800E0 },
            { .reg = 0x00009888, .val = 0x00DCC000 },
            { .reg = 0x00009888, .val = 0x06DC8000 },
            { .reg = 0x00009888, .val = 0x08DCC000 },
            { .reg = 0x00009888, .val = 0x0ADCC000 },
            { .reg = 0x00009888, .val = 0x0CDCC000 },
            { .reg = 0x00009888, .val = 0x0EDCC000 },
            { .reg = 0x00009888, .val = 0x02DCC000 },
            { .reg = 0x00009888, .val = 0x04DCC000 },
            { .reg = 0x00009888, .val = 0x00BD0011 },
            { .reg = 0x00009888, .val = 0x06BD0900 },
            { .reg = 0x00009888, .val = 0x08BD0A13 },
            { .reg = 0x00009888, .val = 0x0ABD0B15 },
            { .reg = 0x00009888, .val = 0x0CBD2317 },
            { .reg = 0x00009888, .val = 0x04BD21B7 },
            { .reg = 0x00009888, .val = 0x10BD0000 },
            { .reg = 0x00009888, .val = 0x0EBD0000 },
            { .reg = 0x00009888, .val = 0x1ABD0000 },
            { .reg = 0x00009888, .val = 0x0ED825C1 },
            { .reg = 0x00009888, .val = 0x00D86100 },
            { .reg = 0x00009888, .val = 0x02D8204C },
            { .reg = 0x00009888, .val = 0x06D88000 },
            { .reg = 0x00009888, .val = 0x08D8C000 },
            { .reg = 0x00009888, .val = 0x0AD8C000 },
            { .reg = 0x00009888, .val = 0x0CD8C000 },
            { .reg = 0x00009888, .val = 0x04D8C000 },
            { .reg = 0x00009888, .val = 0x00DB4000 },
            { .reg = 0x00009888, .val = 0x0EDB4000 },
            { .reg = 0x00009888, .val = 0x18DB5400 },
            { .reg = 0x00009888, .val = 0x1ADB0155 },
            { .reg = 0x00009888, .val = 0x02DB4000 },
            { .reg = 0x00009888, .val = 0x04DB4000 },
            { .reg = 0x00009888, .val = 0x06DB4000 },
            { .reg = 0x00009888, .val = 0x08DB4000 },
            { .reg = 0x00009888, .val = 0x0ADB4000 },
            { .reg = 0x00009888, .val = 0x0C9FA800 },
            { .reg = 0x00009888, .val = 0x0E9FAA2A },
            { .reg = 0x00009888, .val = 0x109F02AA },
            { .reg = 0x00009888, .val = 0x00B84000 },
            { .reg = 0x00009888, .val = 0x0EB84000 },
            { .reg = 0x00009888, .val = 0x16B84000 },
            { .reg = 0x00009888, .val = 0x18B81555 },
            { .reg = 0x00009888, .val = 0x02B84000 },
            { .reg = 0x00009888, .val = 0x04B84000 },
            { .reg = 0x00009888, .val = 0x06B84000 },
            { .reg = 0x00009888, .val = 0x08B84000 },
            { .reg = 0x00009888, .val = 0x0AB84000 },
            { .reg = 0x00009888, .val = 0x00B9A000 },
            { .reg = 0x00009888, .val = 0x06B98000 },
            { .reg = 0x00009888, .val = 0x08B9A000 },
            { .reg = 0x00009888, .val = 0x0AB9A000 },
            { .reg = 0x00009888, .val = 0x0CB9A000 },
            { .reg = 0x00009888, .val = 0x0EB9A000 },
            { .reg = 0x00009888, .val = 0x02B9A000 },
            { .reg = 0x00009888, .val = 0x04B9A000 },
            { .reg = 0x00009888, .val = 0x01888000 },
            { .reg = 0x00009888, .val = 0x0D88F800 },
            { .reg = 0x00009888, .val = 0x0F88000F },
            { .reg = 0x00009888, .val = 0x03888000 },
            { .reg = 0x00009888, .val = 0x05888000 },
            { .reg = 0x00009888, .val = 0x07888000 },
            { .reg = 0x00009888, .val = 0x09888000 },
            { .reg = 0x00009888, .val = 0x0B888000 },
            { .reg = 0x00009888, .val = 0x238B5540 },
            { .reg = 0x00009888, .val = 0x258BAAA2 },
            { .reg = 0x00009888, .val = 0x278B002A },
            { .reg = 0x00009888, .val = 0x018C4000 },
            { .reg = 0x00009888, .val = 0x0F8C4000 },
            { .reg = 0x00009888, .val = 0x178C2000 },
            { .reg = 0x00009888, .val = 0x198C5500 },
            { .reg = 0x00009888, .val = 0x1B8C0015 },
            { .reg = 0x00009888, .val = 0x038C4000 },
            { .reg = 0x00009888, .val = 0x058C4000 },
            { .reg = 0x00009888, .val = 0x078C4000 },
            { .reg = 0x00009888, .val = 0x098C4000 },
            { .reg = 0x00009888, .val = 0x0B8C4000 },
            { .reg = 0x00009888, .val = 0x018DA000 },
            { .reg = 0x00009888, .val = 0x078D8000 },
            { .reg = 0x00009888, .val = 0x098DA000 },
            { .reg = 0x00009888, .val = 0x0B8DA000 },
            { .reg = 0x00009888, .val = 0x0D8DA000 },
            { .reg = 0x00009888, .val = 0x0F8DA000 },
            { .reg = 0x00009888, .val = 0x038DA000 },
            { .reg = 0x00009888, .val = 0x058DA000 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA2 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1D80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D800000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800000 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x10DC00E0 },
            { .reg = 0x00009888, .val = 0x14DB0160 },
            { .reg = 0x00009888, .val = 0x16DB2800 },
            { .reg = 0x00009888, .val = 0x18DB0120 },
            { .reg = 0x00009888, .val = 0x0EDC25C1 },
            { .reg = 0x00009888, .val = 0x00DC6100 },
            { .reg = 0x00009888, .val = 0x02DC204C },
            { .reg = 0x00009888, .val = 0x06DC8000 },
            { .reg = 0x00009888, .val = 0x08DCC000 },
            { .reg = 0x00009888, .val = 0x0ADCC000 },
            { .reg = 0x00009888, .val = 0x0CDCC000 },
            { .reg = 0x00009888, .val = 0x04DCC000 },
            { .reg = 0x00009888, .val = 0x00DB0011 },
            { .reg = 0x00009888, .val = 0x06DB0900 },
            { .reg = 0x00009888, .val = 0x08DB0A13 },
            { .reg = 0x00009888, .val = 0x0ADB0B15 },
            { .reg = 0x00009888, .val = 0x0CDB2317 },
            { .reg = 0x00009888, .val = 0x04DB21B7 },
            { .reg = 0x00009888, .val = 0x10DB0000 },
            { .reg = 0x00009888, .val = 0x0EDB0000 },
            { .reg = 0x00009888, .val = 0x1ADB0000 },
            { .reg = 0x00009888, .val = 0x0C9FA800 },
            { .reg = 0x00009888, .val = 0x0E9FAA2A },
            { .reg = 0x00009888, .val = 0x109F02AA },
            { .reg = 0x00009888, .val = 0x00B84000 },
            { .reg = 0x00009888, .val = 0x0EB84000 },
            { .reg = 0x00009888, .val = 0x16B84000 },
            { .reg = 0x00009888, .val = 0x18B81555 },
            { .reg = 0x00009888, .val = 0x02B84000 },
            { .reg = 0x00009888, .val = 0x04B84000 },
            { .reg = 0x00009888, .val = 0x06B84000 },
            { .reg = 0x00009888, .val = 0x08B84000 },
            { .reg = 0x00009888, .val = 0x0AB84000 },
            { .reg = 0x00009888, .val = 0x00B9A000 },
            { .reg = 0x00009888, .val = 0x06B98000 },
            { .reg = 0x00009888, .val = 0x08B9A000 },
            { .reg = 0x00009888, .val = 0x0AB9A000 },
            { .reg = 0x00009888, .val = 0x0CB9A000 },
            { .reg = 0x00009888, .val = 0x0EB9A000 },
            { .reg = 0x00009888, .val = 0x02B9A000 },
            { .reg = 0x00009888, .val = 0x04B9A000 },
            { .reg = 0x00009888, .val = 0x01888000 },
            { .reg = 0x00009888, .val = 0x0D88F800 },
            { .reg = 0x00009888, .val = 0x0F88000F },
            { .reg = 0x00009888, .val = 0x03888000 },
            { .reg = 0x00009888, .val = 0x05888000 },
            { .reg = 0x00009888, .val = 0x07888000 },
            { .reg = 0x00009888, .val = 0x09888000 },
            { .reg = 0x00009888, .val = 0x0B888000 },
            { .reg = 0x00009888, .val = 0x238B5540 },
            { .reg = 0x00009888, .val = 0x258BAAA2 },
            { .reg = 0x00009888, .val = 0x278B002A },
            { .reg = 0x00009888, .val = 0x018C4000 },
            { .reg = 0x00009888, .val = 0x0F8C4000 },
            { .reg = 0x00009888, .val = 0x178C2000 },
            { .reg = 0x00009888, .val = 0x198C5500 },
            { .reg = 0x00009888, .val = 0x1B8C0015 },
            { .reg = 0x00009888, .val = 0x038C4000 },
            { .reg = 0x00009888, .val = 0x058C4000 },
            { .reg = 0x00009888, .val = 0x078C4000 },
            { .reg = 0x00009888, .val = 0x098C4000 },
            { .reg = 0x00009888, .val = 0x0B8C4000 },
            { .reg = 0x00009888, .val = 0x018DA000 },
            { .reg = 0x00009888, .val = 0x078D8000 },
            { .reg = 0x00009888, .val = 0x098DA000 },
            { .reg = 0x00009888, .val = 0x0B8DA000 },
            { .reg = 0x00009888, .val = 0x0D8DA000 },
            { .reg = 0x00009888, .val = 0x0F8DA000 },
            { .reg = 0x00009888, .val = 0x038DA000 },
            { .reg = 0x00009888, .val = 0x058DA000 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA2 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1D80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D800000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800000 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x10B800E0 },
            { .reg = 0x00009888, .val = 0x14BA0160 },
            { .reg = 0x00009888, .val = 0x16BA2800 },
            { .reg = 0x00009888, .val = 0x18BA0120 },
            { .reg = 0x00009888, .val = 0x0C9FA800 },
            { .reg = 0x00009888, .val = 0x0E9FAA2A },
            { .reg = 0x00009888, .val = 0x109F02AA },
            { .reg = 0x00009888, .val = 0x0EB8A5C1 },
            { .reg = 0x00009888, .val = 0x00B8A100 },
            { .reg = 0x00009888, .val = 0x02B8204C },
            { .reg = 0x00009888, .val = 0x16B88000 },
            { .reg = 0x00009888, .val = 0x18B802AA },
            { .reg = 0x00009888, .val = 0x04B80000 },
            { .reg = 0x00009888, .val = 0x06B80000 },
            { .reg = 0x00009888, .val = 0x08B88000 },
            { .reg = 0x00009888, .val = 0x0AB88000 },
            { .reg = 0x00009888, .val = 0x00B9A000 },
            { .reg = 0x00009888, .val = 0x06B98000 },
            { .reg = 0x00009888, .val = 0x08B9A000 },
            { .reg = 0x00009888, .val = 0x0AB9A000 },
            { .reg = 0x00009888, .val = 0x0CB9A000 },
            { .reg = 0x00009888, .val = 0x0EB9A000 },
            { .reg = 0x00009888, .val = 0x02B9A000 },
            { .reg = 0x00009888, .val = 0x04B9A000 },
            { .reg = 0x00009888, .val = 0x00BA0011 },
            { .reg = 0x00009888, .val = 0x06BA0900 },
            { .reg = 0x00009888, .val = 0x08BA0A13 },
            { .reg = 0x00009888, .val = 0x0ABA0B15 },
            { .reg = 0x00009888, .val = 0x0CBA2317 },
            { .reg = 0x00009888, .val = 0x04BA21B7 },
            { .reg = 0x00009888, .val = 0x10BA0000 },
            { .reg = 0x00009888, .val = 0x0EBA0000 },
            { .reg = 0x00009888, .val = 0x1ABA0000 },
            { .reg = 0x00009888, .val = 0x01888000 },
            { .reg = 0x00009888, .val = 0x0D88F800 },
            { .reg = 0x00009888, .val = 0x0F88000F },
            { .reg = 0x00009888, .val = 0x03888000 },
            { .reg = 0x00009888, .val = 0x05888000 },
            { .reg = 0x00009888, .val = 0x07888000 },
            { .reg = 0x00009888, .val = 0x09888000 },
            { .reg = 0x00009888, .val = 0x0B888000 },
            { .reg = 0x00009888, .val = 0x238B5540 },
            { .reg = 0x00009888, .val = 0x258BAAA2 },
            { .reg = 0x00009888, .val = 0x278B002A },
            { .reg = 0x00009888, .val = 0x018C4000 },
            { .reg = 0x00009888, .val = 0x0F8C4000 },
            { .reg = 0x00009888, .val = 0x178C2000 },
            { .reg = 0x00009888, .val = 0x198C5500 },
            { .reg = 0x00009888, .val = 0x1B8C0015 },
            { .reg = 0x00009888, .val = 0x038C4000 },
            { .reg = 0x00009888, .val = 0x058C4000 },
            { .reg = 0x00009888, .val = 0x078C4000 },
            { .reg = 0x00009888, .val = 0x098C4000 },
            { .reg = 0x00009888, .val = 0x0B8C4000 },
            { .reg = 0x00009888, .val = 0x018DA000 },
            { .reg = 0x00009888, .val = 0x078D8000 },
            { .reg = 0x00009888, .val = 0x098DA000 },
            { .reg = 0x00009888, .val = 0x0B8DA000 },
            { .reg = 0x00009888, .val = 0x0D8DA000 },
            { .reg = 0x00009888, .val = 0x0F8DA000 },
            { .reg = 0x00009888, .val = 0x038DA000 },
            { .reg = 0x00009888, .val = 0x058DA000 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAA2 },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x07848000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x17808000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1D80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D800000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800000 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "Ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "Ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "Ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "Ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "Ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache metric set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "27a364dc-8225-4ecb-b607-d6f1925598d9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 58);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x143F00B3 },
         { .reg = 0x00009888, .val = 0x14BF00B3 },
         { .reg = 0x00009888, .val = 0x138303C0 },
         { .reg = 0x00009888, .val = 0x3B800060 },
         { .reg = 0x00009888, .val = 0x3D800805 },
         { .reg = 0x00009888, .val = 0x003F0029 },
         { .reg = 0x00009888, .val = 0x063F1400 },
         { .reg = 0x00009888, .val = 0x083F1225 },
         { .reg = 0x00009888, .val = 0x0E3F1327 },
         { .reg = 0x00009888, .val = 0x103F0000 },
         { .reg = 0x00009888, .val = 0x005A4000 },
         { .reg = 0x00009888, .val = 0x065A8000 },
         { .reg = 0x00009888, .val = 0x085AC000 },
         { .reg = 0x00009888, .val = 0x0E5AC000 },
         { .reg = 0x00009888, .val = 0x001D4000 },
         { .reg = 0x00009888, .val = 0x061D8000 },
         { .reg = 0x00009888, .val = 0x081DC000 },
         { .reg = 0x00009888, .val = 0x0E1DC000 },
         { .reg = 0x00009888, .val = 0x0C1F0800 },
         { .reg = 0x00009888, .val = 0x0E1F2A00 },
         { .reg = 0x00009888, .val = 0x101F0280 },
         { .reg = 0x00009888, .val = 0x00391000 },
         { .reg = 0x00009888, .val = 0x06394000 },
         { .reg = 0x00009888, .val = 0x08395000 },
         { .reg = 0x00009888, .val = 0x0E395000 },
         { .reg = 0x00009888, .val = 0x0ABF1429 },
         { .reg = 0x00009888, .val = 0x0CBF1225 },
         { .reg = 0x00009888, .val = 0x00BF1380 },
         { .reg = 0x00009888, .val = 0x02BF0026 },
         { .reg = 0x00009888, .val = 0x10BF0000 },
         { .reg = 0x00009888, .val = 0x0ADAC000 },
         { .reg = 0x00009888, .val = 0x0CDAC000 },
         { .reg = 0x00009888, .val = 0x00DA8000 },
         { .reg = 0x00009888, .val = 0x02DA4000 },
         { .reg = 0x00009888, .val = 0x0A9DC000 },
         { .reg = 0x00009888, .val = 0x0C9DC000 },
         { .reg = 0x00009888, .val = 0x009D8000 },
         { .reg = 0x00009888, .val = 0x029D4000 },
         { .reg = 0x00009888, .val = 0x0E9F8000 },
         { .reg = 0x00009888, .val = 0x109F002A },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0AB95000 },
         { .reg = 0x00009888, .val = 0x0CB95000 },
         { .reg = 0x00009888, .val = 0x00B94000 },
         { .reg = 0x00009888, .val = 0x02B91000 },
         { .reg = 0x00009888, .val = 0x0D88C000 },
         { .reg = 0x00009888, .val = 0x0F880003 },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x018A8000 },
         { .reg = 0x00009888, .val = 0x0F8A8000 },
         { .reg = 0x00009888, .val = 0x198A8000 },
         { .reg = 0x00009888, .val = 0x1B8A8020 },
         { .reg = 0x00009888, .val = 0x1D8A0002 },
         { .reg = 0x00009888, .val = 0x238B0520 },
         { .reg = 0x00009888, .val = 0x258BA950 },
         { .reg = 0x00009888, .val = 0x278B0016 },
         { .reg = 0x00009888, .val = 0x198C5400 },
         { .reg = 0x00009888, .val = 0x1B8C0001 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x0B8DA000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038D2000 },
         { .reg = 0x00009888, .val = 0x1F85AA80 },
         { .reg = 0x00009888, .val = 0x2185AAA0 },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x03835180 },
         { .reg = 0x00009888, .val = 0x05834022 },
         { .reg = 0x00009888, .val = 0x11830000 },
         { .reg = 0x00009888, .val = 0x01834000 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x0184C000 },
         { .reg = 0x00009888, .val = 0x07848000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x05844000 },
         { .reg = 0x00009888, .val = 0x1B80C137 },
         { .reg = 0x00009888, .val = 0x1D80C147 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x1180C000 },
         { .reg = 0x00009888, .val = 0x17808000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x15804000 },
         { .reg = 0x00000D24, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x4D801000 },
         { .reg = 0x00009888, .val = 0x4F800111 },
         { .reg = 0x00009888, .val = 0x43800842 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47800840 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x3F800800 },
         { .reg = 0x00009888, .val = 0x418014A2 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 288;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 296;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank10_accesses__read;
         counter->name = "L3 Bank 10 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 10. Unit: messages.";
         counter->symbol_name = "L3Bank10Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank11_accesses__read;
         counter->name = "L3 Bank 11 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 11. Unit: messages.";
         counter->symbol_name = "L3Bank11Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank12_accesses__read;
         counter->name = "L3 Bank 12 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 12. Unit: messages.";
         counter->symbol_name = "L3Bank12Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank13_accesses__read;
         counter->name = "L3 Bank 13 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 13. Unit: messages.";
         counter->symbol_name = "L3Bank13Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 352;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 360;
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank10_ic_accesses__read;
         counter->name = "L3 Bank 10 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 10 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank10IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 368;
      }

      if (perf->sys_vars.slice_mask & 0x02) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bdw__compute_l3_cache__l3_bank10_ic_hits__read;
         counter->name = "L3 Bank 10 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 10 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank10IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 376;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 384;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 392;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 400;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_data_port_reads_coalescing_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Data Port Reads Coalescing metric set";
   query->symbol_name = "DataPortReadsCoalescing";
   query->guid = "857fc630-2f09-4804-85f1-084adfadd5ab";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 35);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.subslice_mask & 0x01) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x103D0005 },
            { .reg = 0x00009888, .val = 0x163D240B },
            { .reg = 0x00009888, .val = 0x1058022F },
            { .reg = 0x00009888, .val = 0x185B5520 },
            { .reg = 0x00009888, .val = 0x198B0003 },
            { .reg = 0x00009888, .val = 0x005CC000 },
            { .reg = 0x00009888, .val = 0x065CC000 },
            { .reg = 0x00009888, .val = 0x085CC000 },
            { .reg = 0x00009888, .val = 0x0A5CC000 },
            { .reg = 0x00009888, .val = 0x0C5CC000 },
            { .reg = 0x00009888, .val = 0x0E5CC000 },
            { .reg = 0x00009888, .val = 0x025C4000 },
            { .reg = 0x00009888, .val = 0x045C8000 },
            { .reg = 0x00009888, .val = 0x003D0000 },
            { .reg = 0x00009888, .val = 0x063D00B0 },
            { .reg = 0x00009888, .val = 0x083D0182 },
            { .reg = 0x00009888, .val = 0x0A3D10A0 },
            { .reg = 0x00009888, .val = 0x0C3D11A2 },
            { .reg = 0x00009888, .val = 0x0E3D0000 },
            { .reg = 0x00009888, .val = 0x183D0000 },
            { .reg = 0x00009888, .val = 0x1A3D0000 },
            { .reg = 0x00009888, .val = 0x0E582242 },
            { .reg = 0x00009888, .val = 0x00586700 },
            { .reg = 0x00009888, .val = 0x0258004F },
            { .reg = 0x00009888, .val = 0x0658C000 },
            { .reg = 0x00009888, .val = 0x0858C000 },
            { .reg = 0x00009888, .val = 0x0A58C000 },
            { .reg = 0x00009888, .val = 0x0C58C000 },
            { .reg = 0x00009888, .val = 0x045B6300 },
            { .reg = 0x00009888, .val = 0x105B0000 },
            { .reg = 0x00009888, .val = 0x005B4000 },
            { .reg = 0x00009888, .val = 0x0E5B4000 },
            { .reg = 0x00009888, .val = 0x1A5B0155 },
            { .reg = 0x00009888, .val = 0x025B4000 },
            { .reg = 0x00009888, .val = 0x0A5B0000 },
            { .reg = 0x00009888, .val = 0x0C5B4000 },
            { .reg = 0x00009888, .val = 0x0C1FA800 },
            { .reg = 0x00009888, .val = 0x0E1FAAA0 },
            { .reg = 0x00009888, .val = 0x101F02AA },
            { .reg = 0x00009888, .val = 0x00384000 },
            { .reg = 0x00009888, .val = 0x0E384000 },
            { .reg = 0x00009888, .val = 0x16384000 },
            { .reg = 0x00009888, .val = 0x18381555 },
            { .reg = 0x00009888, .val = 0x02384000 },
            { .reg = 0x00009888, .val = 0x04384000 },
            { .reg = 0x00009888, .val = 0x0A384000 },
            { .reg = 0x00009888, .val = 0x0C384000 },
            { .reg = 0x00009888, .val = 0x0039A000 },
            { .reg = 0x00009888, .val = 0x0639A000 },
            { .reg = 0x00009888, .val = 0x0839A000 },
            { .reg = 0x00009888, .val = 0x0A39A000 },
            { .reg = 0x00009888, .val = 0x0C39A000 },
            { .reg = 0x00009888, .val = 0x0E39A000 },
            { .reg = 0x00009888, .val = 0x02392000 },
            { .reg = 0x00009888, .val = 0x04398000 },
            { .reg = 0x00009888, .val = 0x018A8000 },
            { .reg = 0x00009888, .val = 0x0F8A8000 },
            { .reg = 0x00009888, .val = 0x198A8000 },
            { .reg = 0x00009888, .val = 0x1B8AAAA0 },
            { .reg = 0x00009888, .val = 0x1D8A0002 },
            { .reg = 0x00009888, .val = 0x038A8000 },
            { .reg = 0x00009888, .val = 0x058A8000 },
            { .reg = 0x00009888, .val = 0x0B8A8000 },
            { .reg = 0x00009888, .val = 0x0D8A8000 },
            { .reg = 0x00009888, .val = 0x038B6300 },
            { .reg = 0x00009888, .val = 0x058B0062 },
            { .reg = 0x00009888, .val = 0x118B0000 },
            { .reg = 0x00009888, .val = 0x238B02A0 },
            { .reg = 0x00009888, .val = 0x258B5555 },
            { .reg = 0x00009888, .val = 0x278B0015 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x2185AAAA },
            { .reg = 0x00009888, .val = 0x2385002A },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830155 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0D834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x0784C000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0D84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x1780C000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1D80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D801000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800001 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800041 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0xBA98BA98 },
         { .reg = 0x00002748, .val = 0xBA98BA98 },
         { .reg = 0x00002744, .val = 0x00003377 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFF2 },
         { .reg = 0x00002774, .val = 0x00007FF0 },
         { .reg = 0x00002778, .val = 0x0007FFE2 },
         { .reg = 0x0000277C, .val = 0x00007FF0 },
         { .reg = 0x00002780, .val = 0x0007FFC2 },
         { .reg = 0x00002784, .val = 0x00007FF0 },
         { .reg = 0x00002788, .val = 0x0007FF82 },
         { .reg = 0x0000278C, .val = 0x00007FF0 },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000BFEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000BFDF },
         { .reg = 0x000027A0, .val = 0x0007FFFA },
         { .reg = 0x000027A4, .val = 0x0000BFBF },
         { .reg = 0x000027A8, .val = 0x0007FFFA },
         { .reg = 0x000027AC, .val = 0x0000BF7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__data_port_reads_coalescing__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads32_b__read;
      counter->name = "EU to Data Port 0 Reads 32";
      counter->desc = "The subslice 0 EU data reads from Data Port with 32B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Reads32B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads64_b__read;
      counter->name = "EU to Data Port 0 Reads 64";
      counter->desc = "The subslice 0 EU data reads from Data Port with 64B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Reads64B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads128_b__read;
      counter->name = "EU to Data Port 0 Reads 128";
      counter->desc = "The subslice 0 EU data reads from Data Port with 128B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Reads128B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__eu_hdc0_reads256_b__read;
      counter->name = "EU to Data Port 0 Reads 256";
      counter->desc = "The subslice 0 EU data reads from Data Port with 256B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Reads256B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_data_reads__read;
      counter->name = "Data Port 0 to L3 Data Reads";
      counter->desc = "The subslice 0 Data Port data and constant reads from L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3DataReads";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_data_writes__read;
      counter->name = "Data Port 0 to L3 Data Writes";
      counter->desc = "The subslice 0 Data Port data writes to L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3DataWrites";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_reads__read;
      counter->name = "All Data Port 0 Reads from L3";
      counter->desc = "The subslice 0 Data Port reads from L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3Reads";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_reads_coalescing__hdc0_l3_writes__read;
      counter->name = "All Data Port 0 Writes to L3";
      counter->desc = "The subslice 0 Data Port writes to L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3Writes";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_bytes_read_per_cache_line__read;
      counter->name = "EuBytesReadPerCacheLine";
      counter->desc = "Average EU bytes read per L3 cache line.";
      counter->symbol_name = "EuBytesReadPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_reads_coalescing__eu_data_reads_per_cache_line__read;
      counter->name = "EuDataReadsPerCacheLine";
      counter->desc = "Coalescing ratio of EU read requests to L3 cache lines.";
      counter->symbol_name = "EuDataReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_UTILIZATION;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_data_port_writes_coalescing_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Data Port Writes Coalescing metric set";
   query->symbol_name = "DataPortWritesCoalescing";
   query->guid = "343ebc99-4a55-414c-8c17-d8e259cf5e20";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.subslice_mask & 0x01) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x103D0005 },
            { .reg = 0x00009888, .val = 0x143D0120 },
            { .reg = 0x00009888, .val = 0x163D2400 },
            { .reg = 0x00009888, .val = 0x1058022F },
            { .reg = 0x00009888, .val = 0x105B0000 },
            { .reg = 0x00009888, .val = 0x198B0003 },
            { .reg = 0x00009888, .val = 0x005CC000 },
            { .reg = 0x00009888, .val = 0x065CC000 },
            { .reg = 0x00009888, .val = 0x085CC000 },
            { .reg = 0x00009888, .val = 0x0A5CC000 },
            { .reg = 0x00009888, .val = 0x0E5CC000 },
            { .reg = 0x00009888, .val = 0x025C4000 },
            { .reg = 0x00009888, .val = 0x045C8000 },
            { .reg = 0x00009888, .val = 0x003D0000 },
            { .reg = 0x00009888, .val = 0x063D0094 },
            { .reg = 0x00009888, .val = 0x083D0182 },
            { .reg = 0x00009888, .val = 0x0A3D1814 },
            { .reg = 0x00009888, .val = 0x0E3D0000 },
            { .reg = 0x00009888, .val = 0x183D0000 },
            { .reg = 0x00009888, .val = 0x1A3D0000 },
            { .reg = 0x00009888, .val = 0x0C3D0000 },
            { .reg = 0x00009888, .val = 0x0E582242 },
            { .reg = 0x00009888, .val = 0x00586700 },
            { .reg = 0x00009888, .val = 0x0258004F },
            { .reg = 0x00009888, .val = 0x0658C000 },
            { .reg = 0x00009888, .val = 0x0858C000 },
            { .reg = 0x00009888, .val = 0x0A58C000 },
            { .reg = 0x00009888, .val = 0x045B6A80 },
            { .reg = 0x00009888, .val = 0x005B4000 },
            { .reg = 0x00009888, .val = 0x0E5B4000 },
            { .reg = 0x00009888, .val = 0x185B5400 },
            { .reg = 0x00009888, .val = 0x1A5B0141 },
            { .reg = 0x00009888, .val = 0x025B4000 },
            { .reg = 0x00009888, .val = 0x0A5B0000 },
            { .reg = 0x00009888, .val = 0x0C5B4000 },
            { .reg = 0x00009888, .val = 0x0C1FA800 },
            { .reg = 0x00009888, .val = 0x0E1FAAA0 },
            { .reg = 0x00009888, .val = 0x101F0282 },
            { .reg = 0x00009888, .val = 0x00384000 },
            { .reg = 0x00009888, .val = 0x0E384000 },
            { .reg = 0x00009888, .val = 0x16384000 },
            { .reg = 0x00009888, .val = 0x18381415 },
            { .reg = 0x00009888, .val = 0x02384000 },
            { .reg = 0x00009888, .val = 0x04384000 },
            { .reg = 0x00009888, .val = 0x0A384000 },
            { .reg = 0x00009888, .val = 0x0C384000 },
            { .reg = 0x00009888, .val = 0x0039A000 },
            { .reg = 0x00009888, .val = 0x0639A000 },
            { .reg = 0x00009888, .val = 0x0839A000 },
            { .reg = 0x00009888, .val = 0x0A39A000 },
            { .reg = 0x00009888, .val = 0x0E39A000 },
            { .reg = 0x00009888, .val = 0x02392000 },
            { .reg = 0x00009888, .val = 0x04398000 },
            { .reg = 0x00009888, .val = 0x018A8000 },
            { .reg = 0x00009888, .val = 0x0F8A8000 },
            { .reg = 0x00009888, .val = 0x198A8000 },
            { .reg = 0x00009888, .val = 0x1B8A82A0 },
            { .reg = 0x00009888, .val = 0x1D8A0002 },
            { .reg = 0x00009888, .val = 0x038A8000 },
            { .reg = 0x00009888, .val = 0x058A8000 },
            { .reg = 0x00009888, .val = 0x0B8A8000 },
            { .reg = 0x00009888, .val = 0x0D8A8000 },
            { .reg = 0x00009888, .val = 0x038B6300 },
            { .reg = 0x00009888, .val = 0x058B0062 },
            { .reg = 0x00009888, .val = 0x118B0000 },
            { .reg = 0x00009888, .val = 0x238B02A0 },
            { .reg = 0x00009888, .val = 0x258B1555 },
            { .reg = 0x00009888, .val = 0x278B0014 },
            { .reg = 0x00009888, .val = 0x1F85AA80 },
            { .reg = 0x00009888, .val = 0x21852AAA },
            { .reg = 0x00009888, .val = 0x23850028 },
            { .reg = 0x00009888, .val = 0x01834000 },
            { .reg = 0x00009888, .val = 0x0F834000 },
            { .reg = 0x00009888, .val = 0x19835400 },
            { .reg = 0x00009888, .val = 0x1B830141 },
            { .reg = 0x00009888, .val = 0x03834000 },
            { .reg = 0x00009888, .val = 0x05834000 },
            { .reg = 0x00009888, .val = 0x07834000 },
            { .reg = 0x00009888, .val = 0x09834000 },
            { .reg = 0x00009888, .val = 0x0B834000 },
            { .reg = 0x00009888, .val = 0x0D834000 },
            { .reg = 0x00009888, .val = 0x0184C000 },
            { .reg = 0x00009888, .val = 0x0784C000 },
            { .reg = 0x00009888, .val = 0x0984C000 },
            { .reg = 0x00009888, .val = 0x0B84C000 },
            { .reg = 0x00009888, .val = 0x0F84C000 },
            { .reg = 0x00009888, .val = 0x0384C000 },
            { .reg = 0x00009888, .val = 0x0584C000 },
            { .reg = 0x00009888, .val = 0x1180C000 },
            { .reg = 0x00009888, .val = 0x1780C000 },
            { .reg = 0x00009888, .val = 0x1980C000 },
            { .reg = 0x00009888, .val = 0x1B80C000 },
            { .reg = 0x00009888, .val = 0x1F80C000 },
            { .reg = 0x00009888, .val = 0x1380C000 },
            { .reg = 0x00009888, .val = 0x1580C000 },
            { .reg = 0x00000D24, .val = 0x00000000 },
            { .reg = 0x00009888, .val = 0x4D801000 },
            { .reg = 0x00009888, .val = 0x3D800000 },
            { .reg = 0x00009888, .val = 0x4F800001 },
            { .reg = 0x00009888, .val = 0x43800000 },
            { .reg = 0x00009888, .val = 0x51800000 },
            { .reg = 0x00009888, .val = 0x45800000 },
            { .reg = 0x00009888, .val = 0x21800000 },
            { .reg = 0x00009888, .val = 0x31800000 },
            { .reg = 0x00009888, .val = 0x53800000 },
            { .reg = 0x00009888, .val = 0x47800420 },
            { .reg = 0x00009888, .val = 0x3F800421 },
            { .reg = 0x00009888, .val = 0x41800041 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0xBA98BA98 },
         { .reg = 0x00002748, .val = 0xBA98BA98 },
         { .reg = 0x00002744, .val = 0x00003377 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FF72 },
         { .reg = 0x00002774, .val = 0x0000BFD0 },
         { .reg = 0x00002778, .val = 0x0007FF62 },
         { .reg = 0x0000277C, .val = 0x0000BFD0 },
         { .reg = 0x00002780, .val = 0x0007FF42 },
         { .reg = 0x00002784, .val = 0x0000BFD0 },
         { .reg = 0x00002788, .val = 0x0007FF02 },
         { .reg = 0x0000278C, .val = 0x0000BFD0 },
         { .reg = 0x00002790, .val = 0x0005FFF2 },
         { .reg = 0x00002794, .val = 0x0000BFD0 },
         { .reg = 0x00002798, .val = 0x0005FFE2 },
         { .reg = 0x0000279C, .val = 0x0000BFD0 },
         { .reg = 0x000027A0, .val = 0x0005FFC2 },
         { .reg = 0x000027A4, .val = 0x0000BFD0 },
         { .reg = 0x000027A8, .val = 0x0005FF82 },
         { .reg = 0x000027AC, .val = 0x0000BFD0 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__data_port_writes_coalescing__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes32_b__read;
      counter->name = "EU to Data Port 0 Writes 32B";
      counter->desc = "The subslice 0 EU data writes to Data Port with 32B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Writes32B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes64_b__read;
      counter->name = "EU to Data Port 0 Writes 64B";
      counter->desc = "The subslice 0 EU data writes to Data Port with 64B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Writes64B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes96_b__read;
      counter->name = "EU to Data Port 0 Writes 64B";
      counter->desc = "The subslice 0 EU data writes to Data Port with 64B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Writes96B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b__read;
      counter->name = "EU to Data Port 0 Writes 128";
      counter->desc = "The subslice 0 EU data writes to Data Port with 128B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Writes128B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes192_b__read;
      counter->name = "EU to Data Port 0 Writes 128";
      counter->desc = "The subslice 0 EU data simd16 writes to Data Port with 192B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Writes192B";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes128_b_simd16__read;
      counter->name = "EU to Data Port 0 Writes 64B";
      counter->desc = "The subslice 0 EU data simd16 writes to Data Port with 128B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Writes128BSimd16";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__eu_hdc0_writes256_b_simd16__read;
      counter->name = "EU to Data Port 0 Writes 256B";
      counter->desc = "The subslice 0 EU data simd16 writes to Data Port with 256B per message. Unit: messages.";
      counter->symbol_name = "EuHdc0Writes256BSimd16";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_data_reads__read;
      counter->name = "Data Port 0 to L3 Data Reads";
      counter->desc = "The subslice 0 Data Port data and constant reads from L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3DataReads";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_data_writes__read;
      counter->name = "Data Port 0 to L3 Data Writes";
      counter->desc = "The subslice 0 Data Port data writes to L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3DataWrites";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_reads__read;
      counter->name = "All Data Port 0 Reads from L3";
      counter->desc = "The subslice 0 Data Port reads from L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3Reads";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__data_port_writes_coalescing__hdc0_l3_writes__read;
      counter->name = "All Data Port 0 Writes to L3";
      counter->desc = "The subslice 0 Data Port writes to L3 cache. Unit: messages.";
      counter->symbol_name = "Hdc0L3Writes";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_bytes_written_per_cache_line__read;
      counter->name = "EuBytesWrittenPerCacheLine";
      counter->desc = "Average EU bytes written per L3 cache line.";
      counter->symbol_name = "EuBytesWrittenPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__data_port_writes_coalescing__eu_data_writes_per_cache_line__read;
      counter->name = "EuDataWritesPerCacheLine";
      counter->desc = "Coalescing ratio of EU write requests to L3 cache lines.";
      counter->symbol_name = "EuDataWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_UTILIZATION;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "78490af2-10fa-430b-ae3c-94ec04d5214e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x105C0232 },
         { .reg = 0x00009888, .val = 0x10580232 },
         { .reg = 0x00009888, .val = 0x10380232 },
         { .reg = 0x00009888, .val = 0x10DC0232 },
         { .reg = 0x00009888, .val = 0x10D80232 },
         { .reg = 0x00009888, .val = 0x10B80232 },
         { .reg = 0x00009888, .val = 0x118E4400 },
         { .reg = 0x00009888, .val = 0x025C6080 },
         { .reg = 0x00009888, .val = 0x045C004B },
         { .reg = 0x00009888, .val = 0x005C8000 },
         { .reg = 0x00009888, .val = 0x00582080 },
         { .reg = 0x00009888, .val = 0x0258004B },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00AA },
         { .reg = 0x00009888, .val = 0x04386080 },
         { .reg = 0x00009888, .val = 0x0638404B },
         { .reg = 0x00009888, .val = 0x02384000 },
         { .reg = 0x00009888, .val = 0x08384000 },
         { .reg = 0x00009888, .val = 0x0A380000 },
         { .reg = 0x00009888, .val = 0x0C380000 },
         { .reg = 0x00009888, .val = 0x00398000 },
         { .reg = 0x00009888, .val = 0x0239A000 },
         { .reg = 0x00009888, .val = 0x0439A000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x0CDC25C1 },
         { .reg = 0x00009888, .val = 0x0ADCC000 },
         { .reg = 0x00009888, .val = 0x0AD825C1 },
         { .reg = 0x00009888, .val = 0x18DB4000 },
         { .reg = 0x00009888, .val = 0x1ADB0001 },
         { .reg = 0x00009888, .val = 0x0E9F8000 },
         { .reg = 0x00009888, .val = 0x109F02AA },
         { .reg = 0x00009888, .val = 0x0EB825C1 },
         { .reg = 0x00009888, .val = 0x18B80154 },
         { .reg = 0x00009888, .val = 0x0AB9A000 },
         { .reg = 0x00009888, .val = 0x0CB9A000 },
         { .reg = 0x00009888, .val = 0x0EB9A000 },
         { .reg = 0x00009888, .val = 0x0D88C000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x258BAA05 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x198C5400 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x098DC000 },
         { .reg = 0x00009888, .val = 0x0B8DA000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x098E05C0 },
         { .reg = 0x00009888, .val = 0x058E0000 },
         { .reg = 0x00009888, .val = 0x198F0020 },
         { .reg = 0x00009888, .val = 0x2185AA0A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x19835000 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x19808000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x51800040 },
         { .reg = 0x00009888, .val = 0x43800400 },
         { .reg = 0x00009888, .val = 0x45800800 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47800C62 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F801042 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x418014A4 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FFF7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader12_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader12AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader11_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader11AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__non_sampler_shader10_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader10AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "c0abdd97-3b13-4cad-814c-bd178804e02c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x10BF03DA },
         { .reg = 0x00009888, .val = 0x14BF0001 },
         { .reg = 0x00009888, .val = 0x12980340 },
         { .reg = 0x00009888, .val = 0x12990340 },
         { .reg = 0x00009888, .val = 0x0CBF1187 },
         { .reg = 0x00009888, .val = 0x0EBF1205 },
         { .reg = 0x00009888, .val = 0x00BF0500 },
         { .reg = 0x00009888, .val = 0x02BF042B },
         { .reg = 0x00009888, .val = 0x04BF002C },
         { .reg = 0x00009888, .val = 0x0CDAC000 },
         { .reg = 0x00009888, .val = 0x0EDAC000 },
         { .reg = 0x00009888, .val = 0x00DA8000 },
         { .reg = 0x00009888, .val = 0x02DAC000 },
         { .reg = 0x00009888, .val = 0x04DA4000 },
         { .reg = 0x00009888, .val = 0x04983400 },
         { .reg = 0x00009888, .val = 0x10980000 },
         { .reg = 0x00009888, .val = 0x06990034 },
         { .reg = 0x00009888, .val = 0x10990000 },
         { .reg = 0x00009888, .val = 0x0C9DC000 },
         { .reg = 0x00009888, .val = 0x0E9DC000 },
         { .reg = 0x00009888, .val = 0x009D8000 },
         { .reg = 0x00009888, .val = 0x029DC000 },
         { .reg = 0x00009888, .val = 0x049D4000 },
         { .reg = 0x00009888, .val = 0x109F02A8 },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0E9F00BA },
         { .reg = 0x00009888, .val = 0x0CB88000 },
         { .reg = 0x00009888, .val = 0x0CB95000 },
         { .reg = 0x00009888, .val = 0x0EB95000 },
         { .reg = 0x00009888, .val = 0x00B94000 },
         { .reg = 0x00009888, .val = 0x02B95000 },
         { .reg = 0x00009888, .val = 0x04B91000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x0CBA4000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x0B888000 },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x258B800A },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B5500 },
         { .reg = 0x00009888, .val = 0x198C4000 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x0B8C4000 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038DA000 },
         { .reg = 0x00009888, .val = 0x058DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x47800000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800060 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank0_stalled__read;
         counter->name = "Slice1 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank1_stalled__read;
         counter->name = "Slice1 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank1_active__read;
         counter->name = "Slice1 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L31Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__l31_bank0_active__read;
         counter->name = "Slice1 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L31Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "ad665281-a7cf-483a-bd10-0e07c43f61c7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x103F03DA },
         { .reg = 0x00009888, .val = 0x143F0001 },
         { .reg = 0x00009888, .val = 0x12180340 },
         { .reg = 0x00009888, .val = 0x12190340 },
         { .reg = 0x00009888, .val = 0x0C3F1187 },
         { .reg = 0x00009888, .val = 0x0E3F1205 },
         { .reg = 0x00009888, .val = 0x003F0500 },
         { .reg = 0x00009888, .val = 0x023F042B },
         { .reg = 0x00009888, .val = 0x043F002C },
         { .reg = 0x00009888, .val = 0x0C5AC000 },
         { .reg = 0x00009888, .val = 0x0E5AC000 },
         { .reg = 0x00009888, .val = 0x005A8000 },
         { .reg = 0x00009888, .val = 0x025AC000 },
         { .reg = 0x00009888, .val = 0x045A4000 },
         { .reg = 0x00009888, .val = 0x04183400 },
         { .reg = 0x00009888, .val = 0x10180000 },
         { .reg = 0x00009888, .val = 0x06190034 },
         { .reg = 0x00009888, .val = 0x10190000 },
         { .reg = 0x00009888, .val = 0x0C1DC000 },
         { .reg = 0x00009888, .val = 0x0E1DC000 },
         { .reg = 0x00009888, .val = 0x001D8000 },
         { .reg = 0x00009888, .val = 0x021DC000 },
         { .reg = 0x00009888, .val = 0x041D4000 },
         { .reg = 0x00009888, .val = 0x101F02A8 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00BA },
         { .reg = 0x00009888, .val = 0x0C388000 },
         { .reg = 0x00009888, .val = 0x0C395000 },
         { .reg = 0x00009888, .val = 0x0E395000 },
         { .reg = 0x00009888, .val = 0x00394000 },
         { .reg = 0x00009888, .val = 0x02395000 },
         { .reg = 0x00009888, .val = 0x04391000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x0C3A4000 },
         { .reg = 0x00009888, .val = 0x1B8AA800 },
         { .reg = 0x00009888, .val = 0x1D8A0002 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x258B4005 },
         { .reg = 0x00009888, .val = 0x278B0015 },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x47800000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800060 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "930a15aa-4300-4fce-a9ba-edb0b9e880be";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x121B0340 },
         { .reg = 0x00009888, .val = 0x103F0274 },
         { .reg = 0x00009888, .val = 0x123F0000 },
         { .reg = 0x00009888, .val = 0x129B0340 },
         { .reg = 0x00009888, .val = 0x10BF0274 },
         { .reg = 0x00009888, .val = 0x12BF0000 },
         { .reg = 0x00009888, .val = 0x041B3400 },
         { .reg = 0x00009888, .val = 0x101B0000 },
         { .reg = 0x00009888, .val = 0x045C8000 },
         { .reg = 0x00009888, .val = 0x0A3D4000 },
         { .reg = 0x00009888, .val = 0x003F0080 },
         { .reg = 0x00009888, .val = 0x023F0793 },
         { .reg = 0x00009888, .val = 0x043F0014 },
         { .reg = 0x00009888, .val = 0x04588000 },
         { .reg = 0x00009888, .val = 0x005A8000 },
         { .reg = 0x00009888, .val = 0x025AC000 },
         { .reg = 0x00009888, .val = 0x045A4000 },
         { .reg = 0x00009888, .val = 0x0A5B4000 },
         { .reg = 0x00009888, .val = 0x001D8000 },
         { .reg = 0x00009888, .val = 0x021DC000 },
         { .reg = 0x00009888, .val = 0x041D4000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F002A },
         { .reg = 0x00009888, .val = 0x0A384000 },
         { .reg = 0x00009888, .val = 0x00394000 },
         { .reg = 0x00009888, .val = 0x02395000 },
         { .reg = 0x00009888, .val = 0x04399000 },
         { .reg = 0x00009888, .val = 0x069B0034 },
         { .reg = 0x00009888, .val = 0x109B0000 },
         { .reg = 0x00009888, .val = 0x06DC4000 },
         { .reg = 0x00009888, .val = 0x0CBD4000 },
         { .reg = 0x00009888, .val = 0x0CBF0981 },
         { .reg = 0x00009888, .val = 0x0EBF0A0F },
         { .reg = 0x00009888, .val = 0x06D84000 },
         { .reg = 0x00009888, .val = 0x0CDAC000 },
         { .reg = 0x00009888, .val = 0x0EDAC000 },
         { .reg = 0x00009888, .val = 0x0CDB4000 },
         { .reg = 0x00009888, .val = 0x0C9DC000 },
         { .reg = 0x00009888, .val = 0x0E9DC000 },
         { .reg = 0x00009888, .val = 0x109F02A8 },
         { .reg = 0x00009888, .val = 0x0E9F0080 },
         { .reg = 0x00009888, .val = 0x0CB84000 },
         { .reg = 0x00009888, .val = 0x0CB95000 },
         { .reg = 0x00009888, .val = 0x0EB95000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x258B8009 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x198C4000 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800C00 },
         { .reg = 0x00009888, .val = 0x47800C63 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F8014A5 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800045 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l31_bank3_stalled__read;
         counter->name = "Slice1 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l31_bank3_active__read;
         counter->name = "Slice1 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L31Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_4";
   query->symbol_name = "L3_4";
   query->guid = "d7793b26-b5e3-4f0f-ad78-7ebc9d0b4c7d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x121A0340 },
         { .reg = 0x00009888, .val = 0x103F0017 },
         { .reg = 0x00009888, .val = 0x123F0020 },
         { .reg = 0x00009888, .val = 0x129A0340 },
         { .reg = 0x00009888, .val = 0x10BF0017 },
         { .reg = 0x00009888, .val = 0x12BF0020 },
         { .reg = 0x00009888, .val = 0x041A3400 },
         { .reg = 0x00009888, .val = 0x101A0000 },
         { .reg = 0x00009888, .val = 0x043B8000 },
         { .reg = 0x00009888, .val = 0x0A3E0010 },
         { .reg = 0x00009888, .val = 0x003F0200 },
         { .reg = 0x00009888, .val = 0x023F0113 },
         { .reg = 0x00009888, .val = 0x043F0014 },
         { .reg = 0x00009888, .val = 0x02592000 },
         { .reg = 0x00009888, .val = 0x005A8000 },
         { .reg = 0x00009888, .val = 0x025AC000 },
         { .reg = 0x00009888, .val = 0x045A4000 },
         { .reg = 0x00009888, .val = 0x0A1C8000 },
         { .reg = 0x00009888, .val = 0x001D8000 },
         { .reg = 0x00009888, .val = 0x021DC000 },
         { .reg = 0x00009888, .val = 0x041D4000 },
         { .reg = 0x00009888, .val = 0x0A1E8000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F001A },
         { .reg = 0x00009888, .val = 0x00394000 },
         { .reg = 0x00009888, .val = 0x02395000 },
         { .reg = 0x00009888, .val = 0x04391000 },
         { .reg = 0x00009888, .val = 0x069A0034 },
         { .reg = 0x00009888, .val = 0x109A0000 },
         { .reg = 0x00009888, .val = 0x06BB4000 },
         { .reg = 0x00009888, .val = 0x0ABE0040 },
         { .reg = 0x00009888, .val = 0x0CBF0984 },
         { .reg = 0x00009888, .val = 0x0EBF0A02 },
         { .reg = 0x00009888, .val = 0x02D94000 },
         { .reg = 0x00009888, .val = 0x0CDAC000 },
         { .reg = 0x00009888, .val = 0x0EDAC000 },
         { .reg = 0x00009888, .val = 0x0C9C0400 },
         { .reg = 0x00009888, .val = 0x0C9DC000 },
         { .reg = 0x00009888, .val = 0x0E9DC000 },
         { .reg = 0x00009888, .val = 0x0C9E0400 },
         { .reg = 0x00009888, .val = 0x109F02A8 },
         { .reg = 0x00009888, .val = 0x0E9F0040 },
         { .reg = 0x00009888, .val = 0x0CB95000 },
         { .reg = 0x00009888, .val = 0x0EB95000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x258B8009 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x198C4000 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800800 },
         { .reg = 0x00009888, .val = 0x47800842 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F801084 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800044 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__l3_4__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__l3_4__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l31_bank2_stalled__read;
         counter->name = "Slice1 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l31_bank2_active__read;
         counter->name = "Slice1 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L31Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__l3_4__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "da0f7875-1143-4d73-a39c-9128a951c46a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 46);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x143B000E },
         { .reg = 0x00009888, .val = 0x043C55C0 },
         { .reg = 0x00009888, .val = 0x0A1E0280 },
         { .reg = 0x00009888, .val = 0x0C1E0408 },
         { .reg = 0x00009888, .val = 0x10390000 },
         { .reg = 0x00009888, .val = 0x12397A1F },
         { .reg = 0x00009888, .val = 0x14BB000E },
         { .reg = 0x00009888, .val = 0x04BC5000 },
         { .reg = 0x00009888, .val = 0x0A9E0296 },
         { .reg = 0x00009888, .val = 0x0C9E0008 },
         { .reg = 0x00009888, .val = 0x10B90000 },
         { .reg = 0x00009888, .val = 0x12B97A1F },
         { .reg = 0x00009888, .val = 0x063B0042 },
         { .reg = 0x00009888, .val = 0x103B0000 },
         { .reg = 0x00009888, .val = 0x083C0000 },
         { .reg = 0x00009888, .val = 0x0A3E0040 },
         { .reg = 0x00009888, .val = 0x043F8000 },
         { .reg = 0x00009888, .val = 0x02594000 },
         { .reg = 0x00009888, .val = 0x045A8000 },
         { .reg = 0x00009888, .val = 0x0C1C0400 },
         { .reg = 0x00009888, .val = 0x041D8000 },
         { .reg = 0x00009888, .val = 0x081E02C0 },
         { .reg = 0x00009888, .val = 0x0E1E0000 },
         { .reg = 0x00009888, .val = 0x0C1FA800 },
         { .reg = 0x00009888, .val = 0x0E1F0260 },
         { .reg = 0x00009888, .val = 0x101F0014 },
         { .reg = 0x00009888, .val = 0x003905E0 },
         { .reg = 0x00009888, .val = 0x06390BC0 },
         { .reg = 0x00009888, .val = 0x02390018 },
         { .reg = 0x00009888, .val = 0x04394000 },
         { .reg = 0x00009888, .val = 0x04BB0042 },
         { .reg = 0x00009888, .val = 0x10BB0000 },
         { .reg = 0x00009888, .val = 0x02BC05C0 },
         { .reg = 0x00009888, .val = 0x08BC0000 },
         { .reg = 0x00009888, .val = 0x0ABE0004 },
         { .reg = 0x00009888, .val = 0x02BF8000 },
         { .reg = 0x00009888, .val = 0x02D91000 },
         { .reg = 0x00009888, .val = 0x02DA8000 },
         { .reg = 0x00009888, .val = 0x089C8000 },
         { .reg = 0x00009888, .val = 0x029D8000 },
         { .reg = 0x00009888, .val = 0x089E8000 },
         { .reg = 0x00009888, .val = 0x0E9E0000 },
         { .reg = 0x00009888, .val = 0x0E9FA806 },
         { .reg = 0x00009888, .val = 0x109F0142 },
         { .reg = 0x00009888, .val = 0x08B90617 },
         { .reg = 0x00009888, .val = 0x0AB90BE0 },
         { .reg = 0x00009888, .val = 0x02B94000 },
         { .reg = 0x00009888, .val = 0x0D88F000 },
         { .reg = 0x00009888, .val = 0x0F88000C },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x018A8000 },
         { .reg = 0x00009888, .val = 0x0F8A8000 },
         { .reg = 0x00009888, .val = 0x1B8A2800 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x238B52A0 },
         { .reg = 0x00009888, .val = 0x258B6A95 },
         { .reg = 0x00009888, .val = 0x278B0029 },
         { .reg = 0x00009888, .val = 0x178C2000 },
         { .reg = 0x00009888, .val = 0x198C1500 },
         { .reg = 0x00009888, .val = 0x1B8C0014 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x098DA000 },
         { .reg = 0x00009888, .val = 0x0B8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x038D8000 },
         { .reg = 0x00009888, .val = 0x058D2000 },
         { .reg = 0x00009888, .val = 0x1F85AA80 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x01834000 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0184C000 },
         { .reg = 0x00009888, .val = 0x0784C000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x1180C000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x4D800444 },
         { .reg = 0x00009888, .val = 0x3D800000 },
         { .reg = 0x00009888, .val = 0x4F804000 },
         { .reg = 0x00009888, .val = 0x43801080 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800084 },
         { .reg = 0x00009888, .val = 0x53800044 },
         { .reg = 0x00009888, .val = 0x47801080 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x3F800000 },
         { .reg = 0x00009888, .val = 0x41800840 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00006000 },
         { .reg = 0x00002774, .val = 0x0000F3FF },
         { .reg = 0x00002778, .val = 0x00001800 },
         { .reg = 0x0000277C, .val = 0x0000FCFF },
         { .reg = 0x00002780, .val = 0x00000600 },
         { .reg = 0x00002784, .val = 0x0000FF3F },
         { .reg = 0x00002788, .val = 0x00000180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000060 },
         { .reg = 0x00002794, .val = 0x0000FFF3 },
         { .reg = 0x00002798, .val = 0x00000018 },
         { .reg = 0x0000279C, .val = 0x0000FFFC },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__pixel_data1_ready__read;
         counter->name = "Slice1 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice1 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData1Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__rasterizer1_input_available__read;
         counter->name = "Slice1 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice1 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer1InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__rasterizer1_output_ready__read;
         counter->name = "Slice1 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice1 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer1OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__pixel_values1_ready__read;
         counter->name = "Slice1 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice1 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues1Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__ps_output1_available__read;
         counter->name = "Slice1 PS Output Available";
         counter->desc = "The percentage of time in which slice1 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput1Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler_1";
   query->symbol_name = "Sampler_1";
   query->guid = "52c186e4-39e3-4534-87cd-41bd47763df9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x18921400 },
         { .reg = 0x00009888, .val = 0x149500AB },
         { .reg = 0x00009888, .val = 0x18B21400 },
         { .reg = 0x00009888, .val = 0x14B500AB },
         { .reg = 0x00009888, .val = 0x18D21400 },
         { .reg = 0x00009888, .val = 0x14D500AB },
         { .reg = 0x00009888, .val = 0x0CDC8000 },
         { .reg = 0x00009888, .val = 0x0EDC4000 },
         { .reg = 0x00009888, .val = 0x02DCC000 },
         { .reg = 0x00009888, .val = 0x04DCC000 },
         { .reg = 0x00009888, .val = 0x1ABD00A0 },
         { .reg = 0x00009888, .val = 0x0ABD8000 },
         { .reg = 0x00009888, .val = 0x0CD88000 },
         { .reg = 0x00009888, .val = 0x0ED84000 },
         { .reg = 0x00009888, .val = 0x04D88000 },
         { .reg = 0x00009888, .val = 0x1ADB0050 },
         { .reg = 0x00009888, .val = 0x04DB8000 },
         { .reg = 0x00009888, .val = 0x06DB8000 },
         { .reg = 0x00009888, .val = 0x08DB8000 },
         { .reg = 0x00009888, .val = 0x0ADB4000 },
         { .reg = 0x00009888, .val = 0x109F02A0 },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0E9F00AA },
         { .reg = 0x00009888, .val = 0x18B82500 },
         { .reg = 0x00009888, .val = 0x02B88000 },
         { .reg = 0x00009888, .val = 0x04B84000 },
         { .reg = 0x00009888, .val = 0x06B84000 },
         { .reg = 0x00009888, .val = 0x08B84000 },
         { .reg = 0x00009888, .val = 0x0AB84000 },
         { .reg = 0x00009888, .val = 0x0CB88000 },
         { .reg = 0x00009888, .val = 0x0CB98000 },
         { .reg = 0x00009888, .val = 0x0EB9A000 },
         { .reg = 0x00009888, .val = 0x00B98000 },
         { .reg = 0x00009888, .val = 0x02B9A000 },
         { .reg = 0x00009888, .val = 0x04B9A000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x1ABA0200 },
         { .reg = 0x00009888, .val = 0x02BA8000 },
         { .reg = 0x00009888, .val = 0x0CBA8000 },
         { .reg = 0x00009888, .val = 0x04908000 },
         { .reg = 0x00009888, .val = 0x04918000 },
         { .reg = 0x00009888, .val = 0x04927300 },
         { .reg = 0x00009888, .val = 0x10920000 },
         { .reg = 0x00009888, .val = 0x1893000A },
         { .reg = 0x00009888, .val = 0x0A934000 },
         { .reg = 0x00009888, .val = 0x0A946000 },
         { .reg = 0x00009888, .val = 0x0C959000 },
         { .reg = 0x00009888, .val = 0x0E950098 },
         { .reg = 0x00009888, .val = 0x10950000 },
         { .reg = 0x00009888, .val = 0x04B04000 },
         { .reg = 0x00009888, .val = 0x04B14000 },
         { .reg = 0x00009888, .val = 0x04B20073 },
         { .reg = 0x00009888, .val = 0x10B20000 },
         { .reg = 0x00009888, .val = 0x04B38000 },
         { .reg = 0x00009888, .val = 0x06B38000 },
         { .reg = 0x00009888, .val = 0x08B34000 },
         { .reg = 0x00009888, .val = 0x04B4C000 },
         { .reg = 0x00009888, .val = 0x02B59890 },
         { .reg = 0x00009888, .val = 0x10B50000 },
         { .reg = 0x00009888, .val = 0x06D04000 },
         { .reg = 0x00009888, .val = 0x06D14000 },
         { .reg = 0x00009888, .val = 0x06D20073 },
         { .reg = 0x00009888, .val = 0x10D20000 },
         { .reg = 0x00009888, .val = 0x18D30020 },
         { .reg = 0x00009888, .val = 0x02D38000 },
         { .reg = 0x00009888, .val = 0x0CD34000 },
         { .reg = 0x00009888, .val = 0x0AD48000 },
         { .reg = 0x00009888, .val = 0x04D42000 },
         { .reg = 0x00009888, .val = 0x0ED59000 },
         { .reg = 0x00009888, .val = 0x00D59800 },
         { .reg = 0x00009888, .val = 0x10D50000 },
         { .reg = 0x00009888, .val = 0x0F88000E },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x0B888000 },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B5500 },
         { .reg = 0x00009888, .val = 0x258B000A },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x0B8C4000 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8D8000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038DA000 },
         { .reg = 0x00009888, .val = 0x058DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x2185000A },
         { .reg = 0x00009888, .val = 0x1B830150 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D848000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D808000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47801021 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800C64 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800C02 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_1__sampler11_input_available__read;
         counter->name = "Slice1 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice1 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler11InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_1__sampler12_input_available__read;
         counter->name = "Slice1 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice1 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler12InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_1__sampler10_input_available__read;
         counter->name = "Slice1 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice1 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler10InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_1__sampler12_output_ready__read;
         counter->name = "Slice1 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice1 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler12OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_1__sampler10_output_ready__read;
         counter->name = "Slice1 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice1 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler10OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_1__sampler11_output_ready__read;
         counter->name = "Slice1 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice1 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler11OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_sampler_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler_2";
   query->symbol_name = "Sampler_2";
   query->guid = "edcb8c31-764d-451a-9ecd-c9c89fb54f8d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x18121400 },
         { .reg = 0x00009888, .val = 0x141500AB },
         { .reg = 0x00009888, .val = 0x18321400 },
         { .reg = 0x00009888, .val = 0x143500AB },
         { .reg = 0x00009888, .val = 0x18521400 },
         { .reg = 0x00009888, .val = 0x145500AB },
         { .reg = 0x00009888, .val = 0x0C5C8000 },
         { .reg = 0x00009888, .val = 0x0E5C4000 },
         { .reg = 0x00009888, .val = 0x025CC000 },
         { .reg = 0x00009888, .val = 0x045CC000 },
         { .reg = 0x00009888, .val = 0x1A3D00A0 },
         { .reg = 0x00009888, .val = 0x0A3D8000 },
         { .reg = 0x00009888, .val = 0x0C588000 },
         { .reg = 0x00009888, .val = 0x0E584000 },
         { .reg = 0x00009888, .val = 0x04588000 },
         { .reg = 0x00009888, .val = 0x1A5B0050 },
         { .reg = 0x00009888, .val = 0x045B8000 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B8000 },
         { .reg = 0x00009888, .val = 0x0A5B4000 },
         { .reg = 0x00009888, .val = 0x101F02A0 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00AA },
         { .reg = 0x00009888, .val = 0x18382500 },
         { .reg = 0x00009888, .val = 0x02388000 },
         { .reg = 0x00009888, .val = 0x04384000 },
         { .reg = 0x00009888, .val = 0x06384000 },
         { .reg = 0x00009888, .val = 0x08384000 },
         { .reg = 0x00009888, .val = 0x0A384000 },
         { .reg = 0x00009888, .val = 0x0C388000 },
         { .reg = 0x00009888, .val = 0x0C398000 },
         { .reg = 0x00009888, .val = 0x0E39A000 },
         { .reg = 0x00009888, .val = 0x00398000 },
         { .reg = 0x00009888, .val = 0x0239A000 },
         { .reg = 0x00009888, .val = 0x0439A000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x1A3A0200 },
         { .reg = 0x00009888, .val = 0x023A8000 },
         { .reg = 0x00009888, .val = 0x0C3A8000 },
         { .reg = 0x00009888, .val = 0x04108000 },
         { .reg = 0x00009888, .val = 0x04118000 },
         { .reg = 0x00009888, .val = 0x04127300 },
         { .reg = 0x00009888, .val = 0x10120000 },
         { .reg = 0x00009888, .val = 0x1813000A },
         { .reg = 0x00009888, .val = 0x0A134000 },
         { .reg = 0x00009888, .val = 0x0A146000 },
         { .reg = 0x00009888, .val = 0x0C159000 },
         { .reg = 0x00009888, .val = 0x0E150098 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04304000 },
         { .reg = 0x00009888, .val = 0x04314000 },
         { .reg = 0x00009888, .val = 0x04320073 },
         { .reg = 0x00009888, .val = 0x10320000 },
         { .reg = 0x00009888, .val = 0x04338000 },
         { .reg = 0x00009888, .val = 0x06338000 },
         { .reg = 0x00009888, .val = 0x08334000 },
         { .reg = 0x00009888, .val = 0x0434C000 },
         { .reg = 0x00009888, .val = 0x02359890 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x06504000 },
         { .reg = 0x00009888, .val = 0x06514000 },
         { .reg = 0x00009888, .val = 0x06520073 },
         { .reg = 0x00009888, .val = 0x10520000 },
         { .reg = 0x00009888, .val = 0x18530020 },
         { .reg = 0x00009888, .val = 0x02538000 },
         { .reg = 0x00009888, .val = 0x0C534000 },
         { .reg = 0x00009888, .val = 0x0A548000 },
         { .reg = 0x00009888, .val = 0x04542000 },
         { .reg = 0x00009888, .val = 0x0E559000 },
         { .reg = 0x00009888, .val = 0x00559800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x1B8AA000 },
         { .reg = 0x00009888, .val = 0x1D8A0002 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x278B0015 },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x258B0005 },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x2185000A },
         { .reg = 0x00009888, .val = 0x1B830150 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D848000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D808000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47801021 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800C64 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800C02 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__sampler_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__sampler_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__sampler_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_2__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_2__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_2__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_2__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_2__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_2__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__sampler_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "729fc3f4-ccff-4902-be6b-f1a22cc92c02";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 48);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x16154D60 },
         { .reg = 0x00009888, .val = 0x16352E60 },
         { .reg = 0x00009888, .val = 0x16554D60 },
         { .reg = 0x00009888, .val = 0x16950000 },
         { .reg = 0x00009888, .val = 0x16B50000 },
         { .reg = 0x00009888, .val = 0x16D50000 },
         { .reg = 0x00009888, .val = 0x005C8000 },
         { .reg = 0x00009888, .val = 0x045CC000 },
         { .reg = 0x00009888, .val = 0x065C4000 },
         { .reg = 0x00009888, .val = 0x083D8000 },
         { .reg = 0x00009888, .val = 0x0A3D8000 },
         { .reg = 0x00009888, .val = 0x0458C000 },
         { .reg = 0x00009888, .val = 0x025B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5B4000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00AA },
         { .reg = 0x00009888, .val = 0x02384000 },
         { .reg = 0x00009888, .val = 0x04388000 },
         { .reg = 0x00009888, .val = 0x06388000 },
         { .reg = 0x00009888, .val = 0x08384000 },
         { .reg = 0x00009888, .val = 0x0A384000 },
         { .reg = 0x00009888, .val = 0x0C384000 },
         { .reg = 0x00009888, .val = 0x00398000 },
         { .reg = 0x00009888, .val = 0x0239A000 },
         { .reg = 0x00009888, .val = 0x0439A000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x043A8000 },
         { .reg = 0x00009888, .val = 0x063A8000 },
         { .reg = 0x00009888, .val = 0x08138000 },
         { .reg = 0x00009888, .val = 0x0A138000 },
         { .reg = 0x00009888, .val = 0x06143000 },
         { .reg = 0x00009888, .val = 0x0415CFC7 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x02338000 },
         { .reg = 0x00009888, .val = 0x0C338000 },
         { .reg = 0x00009888, .val = 0x04342000 },
         { .reg = 0x00009888, .val = 0x06344000 },
         { .reg = 0x00009888, .val = 0x0035C700 },
         { .reg = 0x00009888, .val = 0x063500CF },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04538000 },
         { .reg = 0x00009888, .val = 0x06538000 },
         { .reg = 0x00009888, .val = 0x0454C000 },
         { .reg = 0x00009888, .val = 0x0255CFC7 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06DC8000 },
         { .reg = 0x00009888, .val = 0x08DC4000 },
         { .reg = 0x00009888, .val = 0x0CDCC000 },
         { .reg = 0x00009888, .val = 0x0EDCC000 },
         { .reg = 0x00009888, .val = 0x1ABD00A8 },
         { .reg = 0x00009888, .val = 0x0CD8C000 },
         { .reg = 0x00009888, .val = 0x0ED84000 },
         { .reg = 0x00009888, .val = 0x0EDB8000 },
         { .reg = 0x00009888, .val = 0x18DB0800 },
         { .reg = 0x00009888, .val = 0x1ADB0254 },
         { .reg = 0x00009888, .val = 0x0E9FAA00 },
         { .reg = 0x00009888, .val = 0x109F02AA },
         { .reg = 0x00009888, .val = 0x0EB84000 },
         { .reg = 0x00009888, .val = 0x16B84000 },
         { .reg = 0x00009888, .val = 0x18B8156A },
         { .reg = 0x00009888, .val = 0x06B98000 },
         { .reg = 0x00009888, .val = 0x08B9A000 },
         { .reg = 0x00009888, .val = 0x0AB9A000 },
         { .reg = 0x00009888, .val = 0x0CB9A000 },
         { .reg = 0x00009888, .val = 0x0EB9A000 },
         { .reg = 0x00009888, .val = 0x18BAA000 },
         { .reg = 0x00009888, .val = 0x1ABA0002 },
         { .reg = 0x00009888, .val = 0x16934000 },
         { .reg = 0x00009888, .val = 0x1893000A },
         { .reg = 0x00009888, .val = 0x0A947000 },
         { .reg = 0x00009888, .val = 0x0C95C5C1 },
         { .reg = 0x00009888, .val = 0x0E9500C3 },
         { .reg = 0x00009888, .val = 0x10950000 },
         { .reg = 0x00009888, .val = 0x0EB38000 },
         { .reg = 0x00009888, .val = 0x16B30040 },
         { .reg = 0x00009888, .val = 0x18B30020 },
         { .reg = 0x00009888, .val = 0x06B48000 },
         { .reg = 0x00009888, .val = 0x08B41000 },
         { .reg = 0x00009888, .val = 0x0AB48000 },
         { .reg = 0x00009888, .val = 0x06B5C500 },
         { .reg = 0x00009888, .val = 0x08B500C3 },
         { .reg = 0x00009888, .val = 0x0EB5C100 },
         { .reg = 0x00009888, .val = 0x10B50000 },
         { .reg = 0x00009888, .val = 0x16D31500 },
         { .reg = 0x00009888, .val = 0x08D4E000 },
         { .reg = 0x00009888, .val = 0x08D5C100 },
         { .reg = 0x00009888, .val = 0x0AD5C3C5 },
         { .reg = 0x00009888, .val = 0x10D50000 },
         { .reg = 0x00009888, .val = 0x0D88F800 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x258BAAA5 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x0F8C4000 },
         { .reg = 0x00009888, .val = 0x178C2000 },
         { .reg = 0x00009888, .val = 0x198C5500 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x078D8000 },
         { .reg = 0x00009888, .val = 0x098DA000 },
         { .reg = 0x00009888, .val = 0x0B8DA000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0784C000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800C42 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800063 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47800800 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F8014A4 },
         { .reg = 0x00009888, .val = 0x41801042 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x0000FE7F },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFBF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFF7 },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FFF9 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__non_ps_thread11_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice1 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread11ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__ps_thread10_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice1 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice1 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread10ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__non_ps_thread10_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice1 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread10ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__ps_thread12_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice1 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice1 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread12ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__non_ps_thread12_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice1 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread12ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__ps_thread11_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice1 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice1 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread11ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 284;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 288;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "29598975-4785-43ab-a981-1dfa58d0e835";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 48);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x16150000 },
         { .reg = 0x00009888, .val = 0x16350000 },
         { .reg = 0x00009888, .val = 0x16550000 },
         { .reg = 0x00009888, .val = 0x16952E60 },
         { .reg = 0x00009888, .val = 0x16B54D60 },
         { .reg = 0x00009888, .val = 0x16D52E60 },
         { .reg = 0x00009888, .val = 0x065C8000 },
         { .reg = 0x00009888, .val = 0x085CC000 },
         { .reg = 0x00009888, .val = 0x0A5CC000 },
         { .reg = 0x00009888, .val = 0x0C5C4000 },
         { .reg = 0x00009888, .val = 0x0E3D8000 },
         { .reg = 0x00009888, .val = 0x183DA000 },
         { .reg = 0x00009888, .val = 0x06588000 },
         { .reg = 0x00009888, .val = 0x08588000 },
         { .reg = 0x00009888, .val = 0x0A584000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x185B5800 },
         { .reg = 0x00009888, .val = 0x1A5B000A },
         { .reg = 0x00009888, .val = 0x0E1FAA00 },
         { .reg = 0x00009888, .val = 0x101F02AA },
         { .reg = 0x00009888, .val = 0x0E384000 },
         { .reg = 0x00009888, .val = 0x16384000 },
         { .reg = 0x00009888, .val = 0x18382A55 },
         { .reg = 0x00009888, .val = 0x06398000 },
         { .reg = 0x00009888, .val = 0x0839A000 },
         { .reg = 0x00009888, .val = 0x0A39A000 },
         { .reg = 0x00009888, .val = 0x0C39A000 },
         { .reg = 0x00009888, .val = 0x0E39A000 },
         { .reg = 0x00009888, .val = 0x1A3A02A0 },
         { .reg = 0x00009888, .val = 0x0E138000 },
         { .reg = 0x00009888, .val = 0x16130500 },
         { .reg = 0x00009888, .val = 0x06148000 },
         { .reg = 0x00009888, .val = 0x08146000 },
         { .reg = 0x00009888, .val = 0x0615C100 },
         { .reg = 0x00009888, .val = 0x0815C500 },
         { .reg = 0x00009888, .val = 0x0A1500C3 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x16335040 },
         { .reg = 0x00009888, .val = 0x08349000 },
         { .reg = 0x00009888, .val = 0x0A341000 },
         { .reg = 0x00009888, .val = 0x083500C1 },
         { .reg = 0x00009888, .val = 0x0A35C500 },
         { .reg = 0x00009888, .val = 0x0C3500C3 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x1853002A },
         { .reg = 0x00009888, .val = 0x0A54E000 },
         { .reg = 0x00009888, .val = 0x0C55C500 },
         { .reg = 0x00009888, .val = 0x0E55C1C3 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x00DC8000 },
         { .reg = 0x00009888, .val = 0x02DCC000 },
         { .reg = 0x00009888, .val = 0x04DC4000 },
         { .reg = 0x00009888, .val = 0x04BD8000 },
         { .reg = 0x00009888, .val = 0x06BD8000 },
         { .reg = 0x00009888, .val = 0x02D8C000 },
         { .reg = 0x00009888, .val = 0x02DB8000 },
         { .reg = 0x00009888, .val = 0x04DB4000 },
         { .reg = 0x00009888, .val = 0x06DB4000 },
         { .reg = 0x00009888, .val = 0x08DB8000 },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0E9F00AA },
         { .reg = 0x00009888, .val = 0x02B84000 },
         { .reg = 0x00009888, .val = 0x04B84000 },
         { .reg = 0x00009888, .val = 0x06B84000 },
         { .reg = 0x00009888, .val = 0x08B84000 },
         { .reg = 0x00009888, .val = 0x0AB88000 },
         { .reg = 0x00009888, .val = 0x0CB88000 },
         { .reg = 0x00009888, .val = 0x00B98000 },
         { .reg = 0x00009888, .val = 0x02B9A000 },
         { .reg = 0x00009888, .val = 0x04B9A000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x0ABA8000 },
         { .reg = 0x00009888, .val = 0x0CBA8000 },
         { .reg = 0x00009888, .val = 0x04938000 },
         { .reg = 0x00009888, .val = 0x06938000 },
         { .reg = 0x00009888, .val = 0x0494C000 },
         { .reg = 0x00009888, .val = 0x0295CFC7 },
         { .reg = 0x00009888, .val = 0x10950000 },
         { .reg = 0x00009888, .val = 0x02B38000 },
         { .reg = 0x00009888, .val = 0x08B38000 },
         { .reg = 0x00009888, .val = 0x04B42000 },
         { .reg = 0x00009888, .val = 0x06B41000 },
         { .reg = 0x00009888, .val = 0x00B5C700 },
         { .reg = 0x00009888, .val = 0x04B500CF },
         { .reg = 0x00009888, .val = 0x10B50000 },
         { .reg = 0x00009888, .val = 0x0AD38000 },
         { .reg = 0x00009888, .val = 0x0CD38000 },
         { .reg = 0x00009888, .val = 0x06D46000 },
         { .reg = 0x00009888, .val = 0x04D5C700 },
         { .reg = 0x00009888, .val = 0x06D500CF },
         { .reg = 0x00009888, .val = 0x10D50000 },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x0B888000 },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x0F8A8000 },
         { .reg = 0x00009888, .val = 0x198A8000 },
         { .reg = 0x00009888, .val = 0x1B8AAAA0 },
         { .reg = 0x00009888, .val = 0x1D8A0002 },
         { .reg = 0x00009888, .val = 0x258B555A },
         { .reg = 0x00009888, .val = 0x278B0015 },
         { .reg = 0x00009888, .val = 0x238B5500 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x0B8C4000 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038DA000 },
         { .reg = 0x00009888, .val = 0x058DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0784C000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800882 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45801082 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x478014A5 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800002 },
         { .reg = 0x00009888, .val = 0x41800C62 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x0000FE7F },
         { .reg = 0x00002780, .val = 0x00000000 },
         { .reg = 0x00002784, .val = 0x0000FF9F },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000FFE7 },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFFB },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FFFD },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__thread_header12_ready_port0__read;
         counter->name = "Thread Header Ready on Slice1 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader12ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__thread_header12_ready_port1__read;
         counter->name = "Thread Header Ready on Slice1 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader12ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__thread_header11_ready_port1__read;
         counter->name = "Thread Header Ready on Slice1 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader11ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__thread_header10_ready_port0__read;
         counter->name = "Thread Header Ready on Slice1 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader10ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__thread_header10_ready_port1__read;
         counter->name = "Thread Header Ready on Slice1 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader10ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__thread_header11_ready_port0__read;
         counter->name = "Thread Header Ready on Slice1 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader11ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 284;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 288;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "8fb61ba2-2fbb-454c-a136-2dec5a8a595e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x161503E0 },
         { .reg = 0x00009888, .val = 0x163503E0 },
         { .reg = 0x00009888, .val = 0x165503E0 },
         { .reg = 0x00009888, .val = 0x169503E0 },
         { .reg = 0x00009888, .val = 0x16B503E0 },
         { .reg = 0x00009888, .val = 0x16D503E0 },
         { .reg = 0x00009888, .val = 0x045CC000 },
         { .reg = 0x00009888, .val = 0x083D8000 },
         { .reg = 0x00009888, .val = 0x04584000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5B8000 },
         { .reg = 0x00009888, .val = 0x0E1F00A8 },
         { .reg = 0x00009888, .val = 0x08384000 },
         { .reg = 0x00009888, .val = 0x0A384000 },
         { .reg = 0x00009888, .val = 0x0C388000 },
         { .reg = 0x00009888, .val = 0x0439A000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x0C3A8000 },
         { .reg = 0x00009888, .val = 0x08138000 },
         { .reg = 0x00009888, .val = 0x06141000 },
         { .reg = 0x00009888, .val = 0x041500C3 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x0A338000 },
         { .reg = 0x00009888, .val = 0x06342000 },
         { .reg = 0x00009888, .val = 0x0435C300 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x0C538000 },
         { .reg = 0x00009888, .val = 0x06544000 },
         { .reg = 0x00009888, .val = 0x065500C3 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x00DC8000 },
         { .reg = 0x00009888, .val = 0x02DC4000 },
         { .reg = 0x00009888, .val = 0x02BD8000 },
         { .reg = 0x00009888, .val = 0x00D88000 },
         { .reg = 0x00009888, .val = 0x02DB4000 },
         { .reg = 0x00009888, .val = 0x04DB8000 },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0E9F0002 },
         { .reg = 0x00009888, .val = 0x02B84000 },
         { .reg = 0x00009888, .val = 0x04B84000 },
         { .reg = 0x00009888, .val = 0x06B88000 },
         { .reg = 0x00009888, .val = 0x00B98000 },
         { .reg = 0x00009888, .val = 0x02B9A000 },
         { .reg = 0x00009888, .val = 0x06BA8000 },
         { .reg = 0x00009888, .val = 0x02938000 },
         { .reg = 0x00009888, .val = 0x04942000 },
         { .reg = 0x00009888, .val = 0x0095C300 },
         { .reg = 0x00009888, .val = 0x10950000 },
         { .reg = 0x00009888, .val = 0x04B38000 },
         { .reg = 0x00009888, .val = 0x04B44000 },
         { .reg = 0x00009888, .val = 0x02B500C3 },
         { .reg = 0x00009888, .val = 0x10B50000 },
         { .reg = 0x00009888, .val = 0x06D38000 },
         { .reg = 0x00009888, .val = 0x04D48000 },
         { .reg = 0x00009888, .val = 0x02D5C300 },
         { .reg = 0x00009888, .val = 0x10D50000 },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x238B3500 },
         { .reg = 0x00009888, .val = 0x258B0005 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038DA000 },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x2185000A },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800C40 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41801482 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00001000 },
         { .reg = 0x0000E558, .val = 0x00003002 },
         { .reg = 0x0000E658, .val = 0x00005004 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00050012 },
         { .reg = 0x0000E55C, .val = 0x00052051 },
         { .reg = 0x0000E65C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metric set";
   query->symbol_name = "VMEPipe";
   query->guid = "e1743ca0-7fc8-410b-a066-de7bbb9280b7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x14100812 },
         { .reg = 0x00009888, .val = 0x14125800 },
         { .reg = 0x00009888, .val = 0x161200C0 },
         { .reg = 0x00009888, .val = 0x14300812 },
         { .reg = 0x00009888, .val = 0x14325800 },
         { .reg = 0x00009888, .val = 0x163200C0 },
         { .reg = 0x00009888, .val = 0x005C4000 },
         { .reg = 0x00009888, .val = 0x065C8000 },
         { .reg = 0x00009888, .val = 0x085CC000 },
         { .reg = 0x00009888, .val = 0x0A5CC000 },
         { .reg = 0x00009888, .val = 0x0C5CC000 },
         { .reg = 0x00009888, .val = 0x003D8000 },
         { .reg = 0x00009888, .val = 0x0E3D8000 },
         { .reg = 0x00009888, .val = 0x183D2800 },
         { .reg = 0x00009888, .val = 0x00584000 },
         { .reg = 0x00009888, .val = 0x06588000 },
         { .reg = 0x00009888, .val = 0x0858C000 },
         { .reg = 0x00009888, .val = 0x005B4000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x185B9400 },
         { .reg = 0x00009888, .val = 0x1A5B002A },
         { .reg = 0x00009888, .val = 0x0C1F0800 },
         { .reg = 0x00009888, .val = 0x0E1FAA00 },
         { .reg = 0x00009888, .val = 0x101F002A },
         { .reg = 0x00009888, .val = 0x00384000 },
         { .reg = 0x00009888, .val = 0x0E384000 },
         { .reg = 0x00009888, .val = 0x16384000 },
         { .reg = 0x00009888, .val = 0x18380155 },
         { .reg = 0x00009888, .val = 0x00392000 },
         { .reg = 0x00009888, .val = 0x06398000 },
         { .reg = 0x00009888, .val = 0x0839A000 },
         { .reg = 0x00009888, .val = 0x0A39A000 },
         { .reg = 0x00009888, .val = 0x0C39A000 },
         { .reg = 0x00009888, .val = 0x00100047 },
         { .reg = 0x00009888, .val = 0x06101A80 },
         { .reg = 0x00009888, .val = 0x10100000 },
         { .reg = 0x00009888, .val = 0x0810C000 },
         { .reg = 0x00009888, .val = 0x0811C000 },
         { .reg = 0x00009888, .val = 0x08126151 },
         { .reg = 0x00009888, .val = 0x10120000 },
         { .reg = 0x00009888, .val = 0x00134000 },
         { .reg = 0x00009888, .val = 0x0E134000 },
         { .reg = 0x00009888, .val = 0x161300A0 },
         { .reg = 0x00009888, .val = 0x0A301AC7 },
         { .reg = 0x00009888, .val = 0x10300000 },
         { .reg = 0x00009888, .val = 0x0C30C000 },
         { .reg = 0x00009888, .val = 0x0C31C000 },
         { .reg = 0x00009888, .val = 0x0C326151 },
         { .reg = 0x00009888, .val = 0x10320000 },
         { .reg = 0x00009888, .val = 0x16332A00 },
         { .reg = 0x00009888, .val = 0x18330001 },
         { .reg = 0x00009888, .val = 0x018A8000 },
         { .reg = 0x00009888, .val = 0x0F8A8000 },
         { .reg = 0x00009888, .val = 0x198A8000 },
         { .reg = 0x00009888, .val = 0x1B8A2AA0 },
         { .reg = 0x00009888, .val = 0x238B0020 },
         { .reg = 0x00009888, .val = 0x258B5550 },
         { .reg = 0x00009888, .val = 0x278B0001 },
         { .reg = 0x00009888, .val = 0x1F850080 },
         { .reg = 0x00009888, .val = 0x2185AAA0 },
         { .reg = 0x00009888, .val = 0x23850002 },
         { .reg = 0x00009888, .val = 0x01834000 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830015 },
         { .reg = 0x00009888, .val = 0x01844000 },
         { .reg = 0x00009888, .val = 0x07848000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x11804000 },
         { .reg = 0x00009888, .val = 0x17808000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3D800800 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800002 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800884 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47800002 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "0a9eb7be-feee-4275-a139-6d9cedf0fdb0";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 9);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x1AF00100 },
         { .reg = 0x00009888, .val = 0x118B0000 },
         { .reg = 0x00009888, .val = 0x11850008 },
         { .reg = 0x00009888, .val = 0x17830100 },
         { .reg = 0x00009888, .val = 0x1A700100 },
         { .reg = 0x00009888, .val = 0x1D940040 },
         { .reg = 0x00009888, .val = 0x02F00085 },
         { .reg = 0x00009888, .val = 0x10F00000 },
         { .reg = 0x00009888, .val = 0x24F00000 },
         { .reg = 0x00009888, .val = 0x078B0003 },
         { .reg = 0x00009888, .val = 0x238B0400 },
         { .reg = 0x00009888, .val = 0x258B0000 },
         { .reg = 0x00009888, .val = 0x058C8000 },
         { .reg = 0x00009888, .val = 0x038D2000 },
         { .reg = 0x00009888, .val = 0x03850500 },
         { .reg = 0x00009888, .val = 0x1F858800 },
         { .reg = 0x00009888, .val = 0x2185000A },
         { .reg = 0x00009888, .val = 0x05834055 },
         { .reg = 0x00009888, .val = 0x11830000 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x0B838000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x03844000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x04708500 },
         { .reg = 0x00009888, .val = 0x10700000 },
         { .reg = 0x00009888, .val = 0x24700000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x4D801000 },
         { .reg = 0x00009888, .val = 0x3F800000 },
         { .reg = 0x00009888, .val = 0x4F800004 },
         { .reg = 0x00009888, .val = 0x418014A0 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0007C000 },
         { .reg = 0x00002774, .val = 0x000007FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "d6de6f55-e526-4f79-a6a6-d7315c09044e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x198B0000 },
         { .reg = 0x00009888, .val = 0x078B0066 },
         { .reg = 0x00009888, .val = 0x118B0000 },
         { .reg = 0x00009888, .val = 0x258B0000 },
         { .reg = 0x00009888, .val = 0x21850008 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bdw_register_stc__pma_stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "STC_PmaStall";
   query->guid = "e713f347-953e-4d8c-b02f-6be31df2db2b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 11);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x0C1F0001 },
         { .reg = 0x00009888, .val = 0x001F0015 },
         { .reg = 0x00009888, .val = 0x041F4AC0 },
         { .reg = 0x00009888, .val = 0x061F03D8 },
         { .reg = 0x00009888, .val = 0x0E1F0000 },
         { .reg = 0x00009888, .val = 0x018A8000 },
         { .reg = 0x00009888, .val = 0x0F8A8000 },
         { .reg = 0x00009888, .val = 0x198A8000 },
         { .reg = 0x00009888, .val = 0x1B8A00A0 },
         { .reg = 0x00009888, .val = 0x238B0020 },
         { .reg = 0x00009888, .val = 0x258B0550 },
         { .reg = 0x00009888, .val = 0x1F850080 },
         { .reg = 0x00009888, .val = 0x21850AA0 },
         { .reg = 0x00009888, .val = 0x01834000 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x01844000 },
         { .reg = 0x00009888, .val = 0x07848000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B844000 },
         { .reg = 0x00009888, .val = 0x11804000 },
         { .reg = 0x00009888, .val = 0x17808000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B804000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3D800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800000 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x008000E1 },
         { .reg = 0x00002774, .val = 0x001FFFE0 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bdw__stc__pma_stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bdw__stc__pma_stall__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bdw__stc__pma_stall__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bdw__stc__pma_stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 80;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_bdw(struct intel_perf_config *perf)
{
   bdw_register_render_basic_counter_query(perf);
   bdw_register_compute_basic_counter_query(perf);
   bdw_register_render_pipe_profile_counter_query(perf);
   bdw_register_memory_reads_counter_query(perf);
   bdw_register_memory_writes_counter_query(perf);
   bdw_register_compute_extended_counter_query(perf);
   bdw_register_compute_l3_cache_counter_query(perf);
   bdw_register_data_port_reads_coalescing_counter_query(perf);
   bdw_register_data_port_writes_coalescing_counter_query(perf);
   bdw_register_hdc_and_sf_counter_query(perf);
   bdw_register_l3_1_counter_query(perf);
   bdw_register_l3_2_counter_query(perf);
   bdw_register_l3_3_counter_query(perf);
   bdw_register_l3_4_counter_query(perf);
   bdw_register_rasterizer_and_pixel_backend_counter_query(perf);
   bdw_register_sampler_1_counter_query(perf);
   bdw_register_sampler_2_counter_query(perf);
   bdw_register_tdl_1_counter_query(perf);
   bdw_register_tdl_2_counter_query(perf);
   bdw_register_compute_extra_counter_query(perf);
   bdw_register_vme_pipe_counter_query(perf);
   bdw_register_gpu_busyness_counter_query(perf);
   bdw_register_test_oa_counter_query(perf);
   bdw_register_stc__pma_stall_counter_query(perf);
}


static void
chv_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "9d8a3af5-c02c-4a4a-b947-f1672469e0fb";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 50);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009888, .val = 0x59800001 },
         { .reg = 0x00009888, .val = 0x285A0006 },
         { .reg = 0x00009888, .val = 0x2C110014 },
         { .reg = 0x00009888, .val = 0x2E110000 },
         { .reg = 0x00009888, .val = 0x2C310014 },
         { .reg = 0x00009888, .val = 0x2E310000 },
         { .reg = 0x00009888, .val = 0x2B8303DF },
         { .reg = 0x00009888, .val = 0x3580024F },
         { .reg = 0x00009888, .val = 0x00580888 },
         { .reg = 0x00009888, .val = 0x1E5A0015 },
         { .reg = 0x00009888, .val = 0x205A0014 },
         { .reg = 0x00009888, .val = 0x045A0000 },
         { .reg = 0x00009888, .val = 0x025A0000 },
         { .reg = 0x00009888, .val = 0x02180500 },
         { .reg = 0x00009888, .val = 0x00190555 },
         { .reg = 0x00009888, .val = 0x021D0500 },
         { .reg = 0x00009888, .val = 0x021F0A00 },
         { .reg = 0x00009888, .val = 0x00380444 },
         { .reg = 0x00009888, .val = 0x02390500 },
         { .reg = 0x00009888, .val = 0x003A0666 },
         { .reg = 0x00009888, .val = 0x00100111 },
         { .reg = 0x00009888, .val = 0x06110030 },
         { .reg = 0x00009888, .val = 0x0A110031 },
         { .reg = 0x00009888, .val = 0x0E110046 },
         { .reg = 0x00009888, .val = 0x04110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x00130111 },
         { .reg = 0x00009888, .val = 0x00300444 },
         { .reg = 0x00009888, .val = 0x08310030 },
         { .reg = 0x00009888, .val = 0x0C310031 },
         { .reg = 0x00009888, .val = 0x10310046 },
         { .reg = 0x00009888, .val = 0x04310000 },
         { .reg = 0x00009888, .val = 0x00310000 },
         { .reg = 0x00009888, .val = 0x00330444 },
         { .reg = 0x00009888, .val = 0x038A0A00 },
         { .reg = 0x00009888, .val = 0x018B0FFF },
         { .reg = 0x00009888, .val = 0x038B0A00 },
         { .reg = 0x00009888, .val = 0x01855000 },
         { .reg = 0x00009888, .val = 0x03850055 },
         { .reg = 0x00009888, .val = 0x13830021 },
         { .reg = 0x00009888, .val = 0x15830020 },
         { .reg = 0x00009888, .val = 0x1783002F },
         { .reg = 0x00009888, .val = 0x1983002E },
         { .reg = 0x00009888, .val = 0x1B83002D },
         { .reg = 0x00009888, .val = 0x1D83002C },
         { .reg = 0x00009888, .val = 0x05830000 },
         { .reg = 0x00009888, .val = 0x01840555 },
         { .reg = 0x00009888, .val = 0x03840500 },
         { .reg = 0x00009888, .val = 0x23800074 },
         { .reg = 0x00009888, .val = 0x2580007D },
         { .reg = 0x00009888, .val = 0x05800000 },
         { .reg = 0x00009888, .val = 0x01805000 },
         { .reg = 0x00009888, .val = 0x03800055 },
         { .reg = 0x00009888, .val = 0x01865000 },
         { .reg = 0x00009888, .val = 0x03860055 },
         { .reg = 0x00009888, .val = 0x01875000 },
         { .reg = 0x00009888, .val = 0x03870055 },
         { .reg = 0x00009888, .val = 0x418000AA },
         { .reg = 0x00009888, .val = 0x4380000A },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x4780000A },
         { .reg = 0x00009888, .val = 0x49800000 },
         { .reg = 0x00009888, .val = 0x4B800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x55800000 },
         { .reg = 0x00009888, .val = 0x57800000 },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__sampler0_busy__read;
      counter->name = "Sampler 0 Busy";
      counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
      counter->symbol_name = "Sampler0Busy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__sampler1_busy__read;
      counter->name = "Sampler 1 Busy";
      counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
      counter->symbol_name = "Sampler1Busy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__sampler0_bottleneck__read;
      counter->name = "Sampler 0 Bottleneck";
      counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "Sampler0Bottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__sampler1_bottleneck__read;
      counter->name = "Sampler 1 Bottleneck";
      counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "Sampler1Bottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 328;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "f522a89c-ecd1-4522-8331-3383c54af5f5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009888, .val = 0x59800001 },
         { .reg = 0x00009888, .val = 0x2E5800E0 },
         { .reg = 0x00009888, .val = 0x2E3800E0 },
         { .reg = 0x00009888, .val = 0x3580024F },
         { .reg = 0x00009888, .val = 0x3D800140 },
         { .reg = 0x00009888, .val = 0x08580042 },
         { .reg = 0x00009888, .val = 0x0C580040 },
         { .reg = 0x00009888, .val = 0x1058004C },
         { .reg = 0x00009888, .val = 0x1458004B },
         { .reg = 0x00009888, .val = 0x04580000 },
         { .reg = 0x00009888, .val = 0x00580000 },
         { .reg = 0x00009888, .val = 0x00195555 },
         { .reg = 0x00009888, .val = 0x06380042 },
         { .reg = 0x00009888, .val = 0x0A380040 },
         { .reg = 0x00009888, .val = 0x0E38004C },
         { .reg = 0x00009888, .val = 0x1238004B },
         { .reg = 0x00009888, .val = 0x04380000 },
         { .reg = 0x00009888, .val = 0x00384444 },
         { .reg = 0x00009888, .val = 0x003A5555 },
         { .reg = 0x00009888, .val = 0x018BFFFF },
         { .reg = 0x00009888, .val = 0x01845555 },
         { .reg = 0x00009888, .val = 0x17800074 },
         { .reg = 0x00009888, .val = 0x1980007D },
         { .reg = 0x00009888, .val = 0x1B80007C },
         { .reg = 0x00009888, .val = 0x1D8000B6 },
         { .reg = 0x00009888, .val = 0x1F8000B7 },
         { .reg = 0x00009888, .val = 0x05800000 },
         { .reg = 0x00009888, .val = 0x03800000 },
         { .reg = 0x00009888, .val = 0x418000AA },
         { .reg = 0x00009888, .val = 0x438000AA },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x47800000 },
         { .reg = 0x00009888, .val = 0x4980012A },
         { .reg = 0x00009888, .val = 0x4B80012A },
         { .reg = 0x00009888, .val = 0x4D80012A },
         { .reg = 0x00009888, .val = 0x4F80012A },
         { .reg = 0x00009888, .val = 0x518001CE },
         { .reg = 0x00009888, .val = 0x538001CE },
         { .reg = 0x00009888, .val = 0x5580000E },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__compute_basic__gti_ring_throughput__read;
      counter->name = "GTI Ring Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and Uncore ring. Unit: bytes.";
      counter->symbol_name = "GtiRingThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__gti_ro_stall__read;
      counter->name = "GTI Read-Only Stall";
      counter->desc = "The percentage of time in which GTI Read-Only port has been stalled. Unit: percent.";
      counter->symbol_name = "GtiRoStall";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__compute_basic__gti_rw_stall__read;
      counter->name = "GTI Read-Write Stall";
      counter->desc = "The percentage of time in which GTI Read-Write port has been stalled. Unit: percent.";
      counter->symbol_name = "GtiRwStall";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 292;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "a9ccc03d-a943-4e6b-9cd6-13e063075927";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009888, .val = 0x59800001 },
         { .reg = 0x00009888, .val = 0x261E0000 },
         { .reg = 0x00009888, .val = 0x281F000F },
         { .reg = 0x00009888, .val = 0x2817001A },
         { .reg = 0x00009888, .val = 0x2791001F },
         { .reg = 0x00009888, .val = 0x27880019 },
         { .reg = 0x00009888, .val = 0x2D890000 },
         { .reg = 0x00009888, .val = 0x278A0007 },
         { .reg = 0x00009888, .val = 0x298D001F },
         { .reg = 0x00009888, .val = 0x278E0020 },
         { .reg = 0x00009888, .val = 0x2B8F0012 },
         { .reg = 0x00009888, .val = 0x29900000 },
         { .reg = 0x00009888, .val = 0x00184000 },
         { .reg = 0x00009888, .val = 0x02181000 },
         { .reg = 0x00009888, .val = 0x02194000 },
         { .reg = 0x00009888, .val = 0x141E0002 },
         { .reg = 0x00009888, .val = 0x041E0000 },
         { .reg = 0x00009888, .val = 0x001E0000 },
         { .reg = 0x00009888, .val = 0x221F0015 },
         { .reg = 0x00009888, .val = 0x041F0000 },
         { .reg = 0x00009888, .val = 0x001F4000 },
         { .reg = 0x00009888, .val = 0x021F0000 },
         { .reg = 0x00009888, .val = 0x023A8000 },
         { .reg = 0x00009888, .val = 0x0213C000 },
         { .reg = 0x00009888, .val = 0x02164000 },
         { .reg = 0x00009888, .val = 0x24170012 },
         { .reg = 0x00009888, .val = 0x04170000 },
         { .reg = 0x00009888, .val = 0x07910005 },
         { .reg = 0x00009888, .val = 0x05910000 },
         { .reg = 0x00009888, .val = 0x01911500 },
         { .reg = 0x00009888, .val = 0x03910501 },
         { .reg = 0x00009888, .val = 0x0D880002 },
         { .reg = 0x00009888, .val = 0x1D880003 },
         { .reg = 0x00009888, .val = 0x05880000 },
         { .reg = 0x00009888, .val = 0x0B890032 },
         { .reg = 0x00009888, .val = 0x1B890031 },
         { .reg = 0x00009888, .val = 0x05890000 },
         { .reg = 0x00009888, .val = 0x01890040 },
         { .reg = 0x00009888, .val = 0x03890040 },
         { .reg = 0x00009888, .val = 0x098A0000 },
         { .reg = 0x00009888, .val = 0x198A0004 },
         { .reg = 0x00009888, .val = 0x058A0000 },
         { .reg = 0x00009888, .val = 0x018A8050 },
         { .reg = 0x00009888, .val = 0x038A2050 },
         { .reg = 0x00009888, .val = 0x018B95A9 },
         { .reg = 0x00009888, .val = 0x038BE5A9 },
         { .reg = 0x00009888, .val = 0x018C1500 },
         { .reg = 0x00009888, .val = 0x038C0501 },
         { .reg = 0x00009888, .val = 0x178D0015 },
         { .reg = 0x00009888, .val = 0x058D0000 },
         { .reg = 0x00009888, .val = 0x138E0004 },
         { .reg = 0x00009888, .val = 0x218E000C },
         { .reg = 0x00009888, .val = 0x058E0000 },
         { .reg = 0x00009888, .val = 0x018E0500 },
         { .reg = 0x00009888, .val = 0x038E0101 },
         { .reg = 0x00009888, .val = 0x0F8F0027 },
         { .reg = 0x00009888, .val = 0x058F0000 },
         { .reg = 0x00009888, .val = 0x018F0000 },
         { .reg = 0x00009888, .val = 0x038F0001 },
         { .reg = 0x00009888, .val = 0x11900013 },
         { .reg = 0x00009888, .val = 0x1F900017 },
         { .reg = 0x00009888, .val = 0x05900000 },
         { .reg = 0x00009888, .val = 0x01900100 },
         { .reg = 0x00009888, .val = 0x03900001 },
         { .reg = 0x00009888, .val = 0x01845555 },
         { .reg = 0x00009888, .val = 0x03845555 },
         { .reg = 0x00009888, .val = 0x418000AA },
         { .reg = 0x00009888, .val = 0x438000AA },
         { .reg = 0x00009888, .val = 0x458000AA },
         { .reg = 0x00009888, .val = 0x478000AA },
         { .reg = 0x00009888, .val = 0x4980018C },
         { .reg = 0x00009888, .val = 0x4B80014B },
         { .reg = 0x00009888, .val = 0x4D800128 },
         { .reg = 0x00009888, .val = 0x4F80012A },
         { .reg = 0x00009888, .val = 0x51800187 },
         { .reg = 0x00009888, .val = 0x5380014B },
         { .reg = 0x00009888, .val = 0x55800149 },
         { .reg = 0x00009888, .val = 0x5780010A },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 276;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "33af8252-7a51-4aa3-b0ba-1b946f209d7b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x105C0232 },
         { .reg = 0x00009888, .val = 0x10580232 },
         { .reg = 0x00009888, .val = 0x10380232 },
         { .reg = 0x00009888, .val = 0x10DC0232 },
         { .reg = 0x00009888, .val = 0x10D80232 },
         { .reg = 0x00009888, .val = 0x10B80232 },
         { .reg = 0x00009888, .val = 0x118E4400 },
         { .reg = 0x00009888, .val = 0x025C6080 },
         { .reg = 0x00009888, .val = 0x045C004B },
         { .reg = 0x00009888, .val = 0x005C8000 },
         { .reg = 0x00009888, .val = 0x00582080 },
         { .reg = 0x00009888, .val = 0x0258004B },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00AA },
         { .reg = 0x00009888, .val = 0x04386080 },
         { .reg = 0x00009888, .val = 0x0638404B },
         { .reg = 0x00009888, .val = 0x02384000 },
         { .reg = 0x00009888, .val = 0x08384000 },
         { .reg = 0x00009888, .val = 0x0A380000 },
         { .reg = 0x00009888, .val = 0x0C380000 },
         { .reg = 0x00009888, .val = 0x00398000 },
         { .reg = 0x00009888, .val = 0x0239A000 },
         { .reg = 0x00009888, .val = 0x0439A000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x0CDC25C1 },
         { .reg = 0x00009888, .val = 0x0ADCC000 },
         { .reg = 0x00009888, .val = 0x0AD825C1 },
         { .reg = 0x00009888, .val = 0x18DB4000 },
         { .reg = 0x00009888, .val = 0x1ADB0001 },
         { .reg = 0x00009888, .val = 0x0E9F8000 },
         { .reg = 0x00009888, .val = 0x109F02AA },
         { .reg = 0x00009888, .val = 0x0EB825C1 },
         { .reg = 0x00009888, .val = 0x18B80154 },
         { .reg = 0x00009888, .val = 0x0AB9A000 },
         { .reg = 0x00009888, .val = 0x0CB9A000 },
         { .reg = 0x00009888, .val = 0x0EB9A000 },
         { .reg = 0x00009888, .val = 0x0D88C000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x258BAA05 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x198C5400 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x098DC000 },
         { .reg = 0x00009888, .val = 0x0B8DA000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x098E05C0 },
         { .reg = 0x00009888, .val = 0x058E0000 },
         { .reg = 0x00009888, .val = 0x198F0020 },
         { .reg = 0x00009888, .val = 0x2185AA0A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x19835000 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x19808000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x51800040 },
         { .reg = 0x00009888, .val = 0x43800400 },
         { .reg = 0x00009888, .val = 0x45800800 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47800C62 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F801042 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x418014A4 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FFF7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__hdc_and_sf__non_sampler_shader12_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader12AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__hdc_and_sf__non_sampler_shader11_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader11AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__hdc_and_sf__non_sampler_shader10_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader10AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "78f52900-5317-4417-8067-966fbf215a1f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x10BF03DA },
         { .reg = 0x00009888, .val = 0x14BF0001 },
         { .reg = 0x00009888, .val = 0x12980340 },
         { .reg = 0x00009888, .val = 0x12990340 },
         { .reg = 0x00009888, .val = 0x0CBF1187 },
         { .reg = 0x00009888, .val = 0x0EBF1205 },
         { .reg = 0x00009888, .val = 0x00BF0500 },
         { .reg = 0x00009888, .val = 0x02BF042B },
         { .reg = 0x00009888, .val = 0x04BF002C },
         { .reg = 0x00009888, .val = 0x0CDAC000 },
         { .reg = 0x00009888, .val = 0x0EDAC000 },
         { .reg = 0x00009888, .val = 0x00DA8000 },
         { .reg = 0x00009888, .val = 0x02DAC000 },
         { .reg = 0x00009888, .val = 0x04DA4000 },
         { .reg = 0x00009888, .val = 0x04983400 },
         { .reg = 0x00009888, .val = 0x10980000 },
         { .reg = 0x00009888, .val = 0x06990034 },
         { .reg = 0x00009888, .val = 0x10990000 },
         { .reg = 0x00009888, .val = 0x0C9DC000 },
         { .reg = 0x00009888, .val = 0x0E9DC000 },
         { .reg = 0x00009888, .val = 0x009D8000 },
         { .reg = 0x00009888, .val = 0x029DC000 },
         { .reg = 0x00009888, .val = 0x049D4000 },
         { .reg = 0x00009888, .val = 0x109F02A8 },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0E9F00BA },
         { .reg = 0x00009888, .val = 0x0CB88000 },
         { .reg = 0x00009888, .val = 0x0CB95000 },
         { .reg = 0x00009888, .val = 0x0EB95000 },
         { .reg = 0x00009888, .val = 0x00B94000 },
         { .reg = 0x00009888, .val = 0x02B95000 },
         { .reg = 0x00009888, .val = 0x04B91000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x0CBA4000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x0B888000 },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x258B800A },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B5500 },
         { .reg = 0x00009888, .val = 0x198C4000 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x0B8C4000 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038DA000 },
         { .reg = 0x00009888, .val = 0x058DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x47800000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800060 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_1__l31_bank0_stalled__read;
         counter->name = "Slice1 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_1__l31_bank1_stalled__read;
         counter->name = "Slice1 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_1__l31_bank1_active__read;
         counter->name = "Slice1 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L31Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_1__l31_bank0_active__read;
         counter->name = "Slice1 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L31Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "0db4fe4a-2648-4064-bd6a-89c06d0c27b3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x103F03DA },
         { .reg = 0x00009888, .val = 0x143F0001 },
         { .reg = 0x00009888, .val = 0x12180340 },
         { .reg = 0x00009888, .val = 0x12190340 },
         { .reg = 0x00009888, .val = 0x0C3F1187 },
         { .reg = 0x00009888, .val = 0x0E3F1205 },
         { .reg = 0x00009888, .val = 0x003F0500 },
         { .reg = 0x00009888, .val = 0x023F042B },
         { .reg = 0x00009888, .val = 0x043F002C },
         { .reg = 0x00009888, .val = 0x0C5AC000 },
         { .reg = 0x00009888, .val = 0x0E5AC000 },
         { .reg = 0x00009888, .val = 0x005A8000 },
         { .reg = 0x00009888, .val = 0x025AC000 },
         { .reg = 0x00009888, .val = 0x045A4000 },
         { .reg = 0x00009888, .val = 0x04183400 },
         { .reg = 0x00009888, .val = 0x10180000 },
         { .reg = 0x00009888, .val = 0x06190034 },
         { .reg = 0x00009888, .val = 0x10190000 },
         { .reg = 0x00009888, .val = 0x0C1DC000 },
         { .reg = 0x00009888, .val = 0x0E1DC000 },
         { .reg = 0x00009888, .val = 0x001D8000 },
         { .reg = 0x00009888, .val = 0x021DC000 },
         { .reg = 0x00009888, .val = 0x041D4000 },
         { .reg = 0x00009888, .val = 0x101F02A8 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00BA },
         { .reg = 0x00009888, .val = 0x0C388000 },
         { .reg = 0x00009888, .val = 0x0C395000 },
         { .reg = 0x00009888, .val = 0x0E395000 },
         { .reg = 0x00009888, .val = 0x00394000 },
         { .reg = 0x00009888, .val = 0x02395000 },
         { .reg = 0x00009888, .val = 0x04391000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x0C3A4000 },
         { .reg = 0x00009888, .val = 0x1B8AA800 },
         { .reg = 0x00009888, .val = 0x1D8A0002 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x258B4005 },
         { .reg = 0x00009888, .val = 0x278B0015 },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800000 },
         { .reg = 0x00009888, .val = 0x47800000 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800060 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_2__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_2__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_2__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_2__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "0de6539e-33b4-4aee-83c4-9ab45a579b94";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x121B0340 },
         { .reg = 0x00009888, .val = 0x103F0274 },
         { .reg = 0x00009888, .val = 0x123F0000 },
         { .reg = 0x00009888, .val = 0x129B0340 },
         { .reg = 0x00009888, .val = 0x10BF0274 },
         { .reg = 0x00009888, .val = 0x12BF0000 },
         { .reg = 0x00009888, .val = 0x041B3400 },
         { .reg = 0x00009888, .val = 0x101B0000 },
         { .reg = 0x00009888, .val = 0x045C8000 },
         { .reg = 0x00009888, .val = 0x0A3D4000 },
         { .reg = 0x00009888, .val = 0x003F0080 },
         { .reg = 0x00009888, .val = 0x023F0793 },
         { .reg = 0x00009888, .val = 0x043F0014 },
         { .reg = 0x00009888, .val = 0x04588000 },
         { .reg = 0x00009888, .val = 0x005A8000 },
         { .reg = 0x00009888, .val = 0x025AC000 },
         { .reg = 0x00009888, .val = 0x045A4000 },
         { .reg = 0x00009888, .val = 0x0A5B4000 },
         { .reg = 0x00009888, .val = 0x001D8000 },
         { .reg = 0x00009888, .val = 0x021DC000 },
         { .reg = 0x00009888, .val = 0x041D4000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F002A },
         { .reg = 0x00009888, .val = 0x0A384000 },
         { .reg = 0x00009888, .val = 0x00394000 },
         { .reg = 0x00009888, .val = 0x02395000 },
         { .reg = 0x00009888, .val = 0x04399000 },
         { .reg = 0x00009888, .val = 0x069B0034 },
         { .reg = 0x00009888, .val = 0x109B0000 },
         { .reg = 0x00009888, .val = 0x06DC4000 },
         { .reg = 0x00009888, .val = 0x0CBD4000 },
         { .reg = 0x00009888, .val = 0x0CBF0981 },
         { .reg = 0x00009888, .val = 0x0EBF0A0F },
         { .reg = 0x00009888, .val = 0x06D84000 },
         { .reg = 0x00009888, .val = 0x0CDAC000 },
         { .reg = 0x00009888, .val = 0x0EDAC000 },
         { .reg = 0x00009888, .val = 0x0CDB4000 },
         { .reg = 0x00009888, .val = 0x0C9DC000 },
         { .reg = 0x00009888, .val = 0x0E9DC000 },
         { .reg = 0x00009888, .val = 0x109F02A8 },
         { .reg = 0x00009888, .val = 0x0E9F0080 },
         { .reg = 0x00009888, .val = 0x0CB84000 },
         { .reg = 0x00009888, .val = 0x0CB95000 },
         { .reg = 0x00009888, .val = 0x0EB95000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x258B8009 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x198C4000 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800C00 },
         { .reg = 0x00009888, .val = 0x47800C63 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F8014A5 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800045 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_3__l31_bank3_stalled__read;
         counter->name = "Slice1 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_3__l31_bank3_active__read;
         counter->name = "Slice1 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L31Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_4";
   query->symbol_name = "L3_4";
   query->guid = "be74c9d6-268f-4d7f-b2e8-8d91d4441883";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x121A0340 },
         { .reg = 0x00009888, .val = 0x103F0017 },
         { .reg = 0x00009888, .val = 0x123F0020 },
         { .reg = 0x00009888, .val = 0x129A0340 },
         { .reg = 0x00009888, .val = 0x10BF0017 },
         { .reg = 0x00009888, .val = 0x12BF0020 },
         { .reg = 0x00009888, .val = 0x041A3400 },
         { .reg = 0x00009888, .val = 0x101A0000 },
         { .reg = 0x00009888, .val = 0x043B8000 },
         { .reg = 0x00009888, .val = 0x0A3E0010 },
         { .reg = 0x00009888, .val = 0x003F0200 },
         { .reg = 0x00009888, .val = 0x023F0113 },
         { .reg = 0x00009888, .val = 0x043F0014 },
         { .reg = 0x00009888, .val = 0x02592000 },
         { .reg = 0x00009888, .val = 0x005A8000 },
         { .reg = 0x00009888, .val = 0x025AC000 },
         { .reg = 0x00009888, .val = 0x045A4000 },
         { .reg = 0x00009888, .val = 0x0A1C8000 },
         { .reg = 0x00009888, .val = 0x001D8000 },
         { .reg = 0x00009888, .val = 0x021DC000 },
         { .reg = 0x00009888, .val = 0x041D4000 },
         { .reg = 0x00009888, .val = 0x0A1E8000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F001A },
         { .reg = 0x00009888, .val = 0x00394000 },
         { .reg = 0x00009888, .val = 0x02395000 },
         { .reg = 0x00009888, .val = 0x04391000 },
         { .reg = 0x00009888, .val = 0x069A0034 },
         { .reg = 0x00009888, .val = 0x109A0000 },
         { .reg = 0x00009888, .val = 0x06BB4000 },
         { .reg = 0x00009888, .val = 0x0ABE0040 },
         { .reg = 0x00009888, .val = 0x0CBF0984 },
         { .reg = 0x00009888, .val = 0x0EBF0A02 },
         { .reg = 0x00009888, .val = 0x02D94000 },
         { .reg = 0x00009888, .val = 0x0CDAC000 },
         { .reg = 0x00009888, .val = 0x0EDAC000 },
         { .reg = 0x00009888, .val = 0x0C9C0400 },
         { .reg = 0x00009888, .val = 0x0C9DC000 },
         { .reg = 0x00009888, .val = 0x0E9DC000 },
         { .reg = 0x00009888, .val = 0x0C9E0400 },
         { .reg = 0x00009888, .val = 0x109F02A8 },
         { .reg = 0x00009888, .val = 0x0E9F0040 },
         { .reg = 0x00009888, .val = 0x0CB95000 },
         { .reg = 0x00009888, .val = 0x0EB95000 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x258B8009 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x198C4000 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185800A },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x1B830154 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x45800800 },
         { .reg = 0x00009888, .val = 0x47800842 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F801084 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800044 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__l3_4__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__l3_4__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_4__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_4__l31_bank2_stalled__read;
         counter->name = "Slice1 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice1 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L31Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_4__l31_bank2_active__read;
         counter->name = "Slice1 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice1 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L31Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_4__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__l3_4__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "31bc1ff3-8d09-4497-9d79-59c4f943b502";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 46);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x143B000E },
         { .reg = 0x00009888, .val = 0x043C55C0 },
         { .reg = 0x00009888, .val = 0x0A1E0280 },
         { .reg = 0x00009888, .val = 0x0C1E0408 },
         { .reg = 0x00009888, .val = 0x10390000 },
         { .reg = 0x00009888, .val = 0x12397A1F },
         { .reg = 0x00009888, .val = 0x14BB000E },
         { .reg = 0x00009888, .val = 0x04BC5000 },
         { .reg = 0x00009888, .val = 0x0A9E0296 },
         { .reg = 0x00009888, .val = 0x0C9E0008 },
         { .reg = 0x00009888, .val = 0x10B90000 },
         { .reg = 0x00009888, .val = 0x12B97A1F },
         { .reg = 0x00009888, .val = 0x063B0042 },
         { .reg = 0x00009888, .val = 0x103B0000 },
         { .reg = 0x00009888, .val = 0x083C0000 },
         { .reg = 0x00009888, .val = 0x0A3E0040 },
         { .reg = 0x00009888, .val = 0x043F8000 },
         { .reg = 0x00009888, .val = 0x02594000 },
         { .reg = 0x00009888, .val = 0x045A8000 },
         { .reg = 0x00009888, .val = 0x0C1C0400 },
         { .reg = 0x00009888, .val = 0x041D8000 },
         { .reg = 0x00009888, .val = 0x081E02C0 },
         { .reg = 0x00009888, .val = 0x0E1E0000 },
         { .reg = 0x00009888, .val = 0x0C1FA800 },
         { .reg = 0x00009888, .val = 0x0E1F0260 },
         { .reg = 0x00009888, .val = 0x101F0014 },
         { .reg = 0x00009888, .val = 0x003905E0 },
         { .reg = 0x00009888, .val = 0x06390BC0 },
         { .reg = 0x00009888, .val = 0x02390018 },
         { .reg = 0x00009888, .val = 0x04394000 },
         { .reg = 0x00009888, .val = 0x04BB0042 },
         { .reg = 0x00009888, .val = 0x10BB0000 },
         { .reg = 0x00009888, .val = 0x02BC05C0 },
         { .reg = 0x00009888, .val = 0x08BC0000 },
         { .reg = 0x00009888, .val = 0x0ABE0004 },
         { .reg = 0x00009888, .val = 0x02BF8000 },
         { .reg = 0x00009888, .val = 0x02D91000 },
         { .reg = 0x00009888, .val = 0x02DA8000 },
         { .reg = 0x00009888, .val = 0x089C8000 },
         { .reg = 0x00009888, .val = 0x029D8000 },
         { .reg = 0x00009888, .val = 0x089E8000 },
         { .reg = 0x00009888, .val = 0x0E9E0000 },
         { .reg = 0x00009888, .val = 0x0E9FA806 },
         { .reg = 0x00009888, .val = 0x109F0142 },
         { .reg = 0x00009888, .val = 0x08B90617 },
         { .reg = 0x00009888, .val = 0x0AB90BE0 },
         { .reg = 0x00009888, .val = 0x02B94000 },
         { .reg = 0x00009888, .val = 0x0D88F000 },
         { .reg = 0x00009888, .val = 0x0F88000C },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x018A8000 },
         { .reg = 0x00009888, .val = 0x0F8A8000 },
         { .reg = 0x00009888, .val = 0x1B8A2800 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x238B52A0 },
         { .reg = 0x00009888, .val = 0x258B6A95 },
         { .reg = 0x00009888, .val = 0x278B0029 },
         { .reg = 0x00009888, .val = 0x178C2000 },
         { .reg = 0x00009888, .val = 0x198C1500 },
         { .reg = 0x00009888, .val = 0x1B8C0014 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x098DA000 },
         { .reg = 0x00009888, .val = 0x0B8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x038D8000 },
         { .reg = 0x00009888, .val = 0x058D2000 },
         { .reg = 0x00009888, .val = 0x1F85AA80 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x01834000 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0184C000 },
         { .reg = 0x00009888, .val = 0x0784C000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x1180C000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x4D800444 },
         { .reg = 0x00009888, .val = 0x3D800000 },
         { .reg = 0x00009888, .val = 0x4F804000 },
         { .reg = 0x00009888, .val = 0x43801080 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800084 },
         { .reg = 0x00009888, .val = 0x53800044 },
         { .reg = 0x00009888, .val = 0x47801080 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x3F800000 },
         { .reg = 0x00009888, .val = 0x41800840 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00006000 },
         { .reg = 0x00002774, .val = 0x0000F3FF },
         { .reg = 0x00002778, .val = 0x00001800 },
         { .reg = 0x0000277C, .val = 0x0000FCFF },
         { .reg = 0x00002780, .val = 0x00000600 },
         { .reg = 0x00002784, .val = 0x0000FF3F },
         { .reg = 0x00002788, .val = 0x00000180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000060 },
         { .reg = 0x00002794, .val = 0x0000FFF3 },
         { .reg = 0x00002798, .val = 0x00000018 },
         { .reg = 0x0000279C, .val = 0x0000FFFC },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__pixel_data1_ready__read;
         counter->name = "Slice1 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice1 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData1Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied)) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__rasterizer1_input_available__read;
         counter->name = "Slice1 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice1 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer1InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__rasterizer1_output_ready__read;
         counter->name = "Slice1 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice1 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer1OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__pixel_values1_ready__read;
         counter->name = "Slice1 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice1 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues1Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.slice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__ps_output1_available__read;
         counter->name = "Slice1 PS Output Available";
         counter->desc = "The percentage of time in which slice1 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput1Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler_1";
   query->symbol_name = "Sampler_1";
   query->guid = "05450fec-bb5c-4b0a-8995-e03631168a34";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x18921400 },
         { .reg = 0x00009888, .val = 0x149500AB },
         { .reg = 0x00009888, .val = 0x18B21400 },
         { .reg = 0x00009888, .val = 0x14B500AB },
         { .reg = 0x00009888, .val = 0x18D21400 },
         { .reg = 0x00009888, .val = 0x14D500AB },
         { .reg = 0x00009888, .val = 0x0CDC8000 },
         { .reg = 0x00009888, .val = 0x0EDC4000 },
         { .reg = 0x00009888, .val = 0x02DCC000 },
         { .reg = 0x00009888, .val = 0x04DCC000 },
         { .reg = 0x00009888, .val = 0x1ABD00A0 },
         { .reg = 0x00009888, .val = 0x0ABD8000 },
         { .reg = 0x00009888, .val = 0x0CD88000 },
         { .reg = 0x00009888, .val = 0x0ED84000 },
         { .reg = 0x00009888, .val = 0x04D88000 },
         { .reg = 0x00009888, .val = 0x1ADB0050 },
         { .reg = 0x00009888, .val = 0x04DB8000 },
         { .reg = 0x00009888, .val = 0x06DB8000 },
         { .reg = 0x00009888, .val = 0x08DB8000 },
         { .reg = 0x00009888, .val = 0x0ADB4000 },
         { .reg = 0x00009888, .val = 0x109F02A0 },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0E9F00AA },
         { .reg = 0x00009888, .val = 0x18B82500 },
         { .reg = 0x00009888, .val = 0x02B88000 },
         { .reg = 0x00009888, .val = 0x04B84000 },
         { .reg = 0x00009888, .val = 0x06B84000 },
         { .reg = 0x00009888, .val = 0x08B84000 },
         { .reg = 0x00009888, .val = 0x0AB84000 },
         { .reg = 0x00009888, .val = 0x0CB88000 },
         { .reg = 0x00009888, .val = 0x0CB98000 },
         { .reg = 0x00009888, .val = 0x0EB9A000 },
         { .reg = 0x00009888, .val = 0x00B98000 },
         { .reg = 0x00009888, .val = 0x02B9A000 },
         { .reg = 0x00009888, .val = 0x04B9A000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x1ABA0200 },
         { .reg = 0x00009888, .val = 0x02BA8000 },
         { .reg = 0x00009888, .val = 0x0CBA8000 },
         { .reg = 0x00009888, .val = 0x04908000 },
         { .reg = 0x00009888, .val = 0x04918000 },
         { .reg = 0x00009888, .val = 0x04927300 },
         { .reg = 0x00009888, .val = 0x10920000 },
         { .reg = 0x00009888, .val = 0x1893000A },
         { .reg = 0x00009888, .val = 0x0A934000 },
         { .reg = 0x00009888, .val = 0x0A946000 },
         { .reg = 0x00009888, .val = 0x0C959000 },
         { .reg = 0x00009888, .val = 0x0E950098 },
         { .reg = 0x00009888, .val = 0x10950000 },
         { .reg = 0x00009888, .val = 0x04B04000 },
         { .reg = 0x00009888, .val = 0x04B14000 },
         { .reg = 0x00009888, .val = 0x04B20073 },
         { .reg = 0x00009888, .val = 0x10B20000 },
         { .reg = 0x00009888, .val = 0x04B38000 },
         { .reg = 0x00009888, .val = 0x06B38000 },
         { .reg = 0x00009888, .val = 0x08B34000 },
         { .reg = 0x00009888, .val = 0x04B4C000 },
         { .reg = 0x00009888, .val = 0x02B59890 },
         { .reg = 0x00009888, .val = 0x10B50000 },
         { .reg = 0x00009888, .val = 0x06D04000 },
         { .reg = 0x00009888, .val = 0x06D14000 },
         { .reg = 0x00009888, .val = 0x06D20073 },
         { .reg = 0x00009888, .val = 0x10D20000 },
         { .reg = 0x00009888, .val = 0x18D30020 },
         { .reg = 0x00009888, .val = 0x02D38000 },
         { .reg = 0x00009888, .val = 0x0CD34000 },
         { .reg = 0x00009888, .val = 0x0AD48000 },
         { .reg = 0x00009888, .val = 0x04D42000 },
         { .reg = 0x00009888, .val = 0x0ED59000 },
         { .reg = 0x00009888, .val = 0x00D59800 },
         { .reg = 0x00009888, .val = 0x10D50000 },
         { .reg = 0x00009888, .val = 0x0F88000E },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x0B888000 },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B5500 },
         { .reg = 0x00009888, .val = 0x258B000A },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x0B8C4000 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x0D8D8000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038DA000 },
         { .reg = 0x00009888, .val = 0x058DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x2185000A },
         { .reg = 0x00009888, .val = 0x1B830150 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D848000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D808000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47801021 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800C64 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800C02 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_1__sampler11_input_available__read;
         counter->name = "Slice1 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice1 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler11InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_1__sampler12_input_available__read;
         counter->name = "Slice1 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice1 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler12InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_1__sampler10_input_available__read;
         counter->name = "Slice1 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice1 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler10InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_1__sampler12_output_ready__read;
         counter->name = "Slice1 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice1 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler12OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_1__sampler10_output_ready__read;
         counter->name = "Slice1 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice1 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler10OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_1__sampler11_output_ready__read;
         counter->name = "Slice1 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice1 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler11OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_sampler_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler_2";
   query->symbol_name = "Sampler_2";
   query->guid = "883c8a56-8767-4de8-beec-a99462c6674b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x18121400 },
         { .reg = 0x00009888, .val = 0x141500AB },
         { .reg = 0x00009888, .val = 0x18321400 },
         { .reg = 0x00009888, .val = 0x143500AB },
         { .reg = 0x00009888, .val = 0x18521400 },
         { .reg = 0x00009888, .val = 0x145500AB },
         { .reg = 0x00009888, .val = 0x0C5C8000 },
         { .reg = 0x00009888, .val = 0x0E5C4000 },
         { .reg = 0x00009888, .val = 0x025CC000 },
         { .reg = 0x00009888, .val = 0x045CC000 },
         { .reg = 0x00009888, .val = 0x1A3D00A0 },
         { .reg = 0x00009888, .val = 0x0A3D8000 },
         { .reg = 0x00009888, .val = 0x0C588000 },
         { .reg = 0x00009888, .val = 0x0E584000 },
         { .reg = 0x00009888, .val = 0x04588000 },
         { .reg = 0x00009888, .val = 0x1A5B0050 },
         { .reg = 0x00009888, .val = 0x045B8000 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B8000 },
         { .reg = 0x00009888, .val = 0x0A5B4000 },
         { .reg = 0x00009888, .val = 0x101F02A0 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00AA },
         { .reg = 0x00009888, .val = 0x18382500 },
         { .reg = 0x00009888, .val = 0x02388000 },
         { .reg = 0x00009888, .val = 0x04384000 },
         { .reg = 0x00009888, .val = 0x06384000 },
         { .reg = 0x00009888, .val = 0x08384000 },
         { .reg = 0x00009888, .val = 0x0A384000 },
         { .reg = 0x00009888, .val = 0x0C388000 },
         { .reg = 0x00009888, .val = 0x0C398000 },
         { .reg = 0x00009888, .val = 0x0E39A000 },
         { .reg = 0x00009888, .val = 0x00398000 },
         { .reg = 0x00009888, .val = 0x0239A000 },
         { .reg = 0x00009888, .val = 0x0439A000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x1A3A0200 },
         { .reg = 0x00009888, .val = 0x023A8000 },
         { .reg = 0x00009888, .val = 0x0C3A8000 },
         { .reg = 0x00009888, .val = 0x04108000 },
         { .reg = 0x00009888, .val = 0x04118000 },
         { .reg = 0x00009888, .val = 0x04127300 },
         { .reg = 0x00009888, .val = 0x10120000 },
         { .reg = 0x00009888, .val = 0x1813000A },
         { .reg = 0x00009888, .val = 0x0A134000 },
         { .reg = 0x00009888, .val = 0x0A146000 },
         { .reg = 0x00009888, .val = 0x0C159000 },
         { .reg = 0x00009888, .val = 0x0E150098 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04304000 },
         { .reg = 0x00009888, .val = 0x04314000 },
         { .reg = 0x00009888, .val = 0x04320073 },
         { .reg = 0x00009888, .val = 0x10320000 },
         { .reg = 0x00009888, .val = 0x04338000 },
         { .reg = 0x00009888, .val = 0x06338000 },
         { .reg = 0x00009888, .val = 0x08334000 },
         { .reg = 0x00009888, .val = 0x0434C000 },
         { .reg = 0x00009888, .val = 0x02359890 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x06504000 },
         { .reg = 0x00009888, .val = 0x06514000 },
         { .reg = 0x00009888, .val = 0x06520073 },
         { .reg = 0x00009888, .val = 0x10520000 },
         { .reg = 0x00009888, .val = 0x18530020 },
         { .reg = 0x00009888, .val = 0x02538000 },
         { .reg = 0x00009888, .val = 0x0C534000 },
         { .reg = 0x00009888, .val = 0x0A548000 },
         { .reg = 0x00009888, .val = 0x04542000 },
         { .reg = 0x00009888, .val = 0x0E559000 },
         { .reg = 0x00009888, .val = 0x00559800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x1B8AA000 },
         { .reg = 0x00009888, .val = 0x1D8A0002 },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x278B0015 },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x258B0005 },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x2185000A },
         { .reg = 0x00009888, .val = 0x1B830150 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0D848000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x07844000 },
         { .reg = 0x00009888, .val = 0x1D808000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x17804000 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47801021 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800C64 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x41800C02 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__sampler_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__sampler_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__sampler_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_2__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_2__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_2__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_2__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_2__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_2__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__sampler_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "eb97e523-d9ac-48a9-b7f8-9cc909ddbf14";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 48);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x16154D60 },
         { .reg = 0x00009888, .val = 0x16352E60 },
         { .reg = 0x00009888, .val = 0x16554D60 },
         { .reg = 0x00009888, .val = 0x16950000 },
         { .reg = 0x00009888, .val = 0x16B50000 },
         { .reg = 0x00009888, .val = 0x16D50000 },
         { .reg = 0x00009888, .val = 0x005C8000 },
         { .reg = 0x00009888, .val = 0x045CC000 },
         { .reg = 0x00009888, .val = 0x065C4000 },
         { .reg = 0x00009888, .val = 0x083D8000 },
         { .reg = 0x00009888, .val = 0x0A3D8000 },
         { .reg = 0x00009888, .val = 0x0458C000 },
         { .reg = 0x00009888, .val = 0x025B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5B4000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0C1FA000 },
         { .reg = 0x00009888, .val = 0x0E1F00AA },
         { .reg = 0x00009888, .val = 0x02384000 },
         { .reg = 0x00009888, .val = 0x04388000 },
         { .reg = 0x00009888, .val = 0x06388000 },
         { .reg = 0x00009888, .val = 0x08384000 },
         { .reg = 0x00009888, .val = 0x0A384000 },
         { .reg = 0x00009888, .val = 0x0C384000 },
         { .reg = 0x00009888, .val = 0x00398000 },
         { .reg = 0x00009888, .val = 0x0239A000 },
         { .reg = 0x00009888, .val = 0x0439A000 },
         { .reg = 0x00009888, .val = 0x06392000 },
         { .reg = 0x00009888, .val = 0x043A8000 },
         { .reg = 0x00009888, .val = 0x063A8000 },
         { .reg = 0x00009888, .val = 0x08138000 },
         { .reg = 0x00009888, .val = 0x0A138000 },
         { .reg = 0x00009888, .val = 0x06143000 },
         { .reg = 0x00009888, .val = 0x0415CFC7 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x02338000 },
         { .reg = 0x00009888, .val = 0x0C338000 },
         { .reg = 0x00009888, .val = 0x04342000 },
         { .reg = 0x00009888, .val = 0x06344000 },
         { .reg = 0x00009888, .val = 0x0035C700 },
         { .reg = 0x00009888, .val = 0x063500CF },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04538000 },
         { .reg = 0x00009888, .val = 0x06538000 },
         { .reg = 0x00009888, .val = 0x0454C000 },
         { .reg = 0x00009888, .val = 0x0255CFC7 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06DC8000 },
         { .reg = 0x00009888, .val = 0x08DC4000 },
         { .reg = 0x00009888, .val = 0x0CDCC000 },
         { .reg = 0x00009888, .val = 0x0EDCC000 },
         { .reg = 0x00009888, .val = 0x1ABD00A8 },
         { .reg = 0x00009888, .val = 0x0CD8C000 },
         { .reg = 0x00009888, .val = 0x0ED84000 },
         { .reg = 0x00009888, .val = 0x0EDB8000 },
         { .reg = 0x00009888, .val = 0x18DB0800 },
         { .reg = 0x00009888, .val = 0x1ADB0254 },
         { .reg = 0x00009888, .val = 0x0E9FAA00 },
         { .reg = 0x00009888, .val = 0x109F02AA },
         { .reg = 0x00009888, .val = 0x0EB84000 },
         { .reg = 0x00009888, .val = 0x16B84000 },
         { .reg = 0x00009888, .val = 0x18B8156A },
         { .reg = 0x00009888, .val = 0x06B98000 },
         { .reg = 0x00009888, .val = 0x08B9A000 },
         { .reg = 0x00009888, .val = 0x0AB9A000 },
         { .reg = 0x00009888, .val = 0x0CB9A000 },
         { .reg = 0x00009888, .val = 0x0EB9A000 },
         { .reg = 0x00009888, .val = 0x18BAA000 },
         { .reg = 0x00009888, .val = 0x1ABA0002 },
         { .reg = 0x00009888, .val = 0x16934000 },
         { .reg = 0x00009888, .val = 0x1893000A },
         { .reg = 0x00009888, .val = 0x0A947000 },
         { .reg = 0x00009888, .val = 0x0C95C5C1 },
         { .reg = 0x00009888, .val = 0x0E9500C3 },
         { .reg = 0x00009888, .val = 0x10950000 },
         { .reg = 0x00009888, .val = 0x0EB38000 },
         { .reg = 0x00009888, .val = 0x16B30040 },
         { .reg = 0x00009888, .val = 0x18B30020 },
         { .reg = 0x00009888, .val = 0x06B48000 },
         { .reg = 0x00009888, .val = 0x08B41000 },
         { .reg = 0x00009888, .val = 0x0AB48000 },
         { .reg = 0x00009888, .val = 0x06B5C500 },
         { .reg = 0x00009888, .val = 0x08B500C3 },
         { .reg = 0x00009888, .val = 0x0EB5C100 },
         { .reg = 0x00009888, .val = 0x10B50000 },
         { .reg = 0x00009888, .val = 0x16D31500 },
         { .reg = 0x00009888, .val = 0x08D4E000 },
         { .reg = 0x00009888, .val = 0x08D5C100 },
         { .reg = 0x00009888, .val = 0x0AD5C3C5 },
         { .reg = 0x00009888, .val = 0x10D50000 },
         { .reg = 0x00009888, .val = 0x0D88F800 },
         { .reg = 0x00009888, .val = 0x0F88000F },
         { .reg = 0x00009888, .val = 0x038A8000 },
         { .reg = 0x00009888, .val = 0x058A8000 },
         { .reg = 0x00009888, .val = 0x078A8000 },
         { .reg = 0x00009888, .val = 0x098A8000 },
         { .reg = 0x00009888, .val = 0x0B8A8000 },
         { .reg = 0x00009888, .val = 0x0D8A8000 },
         { .reg = 0x00009888, .val = 0x258BAAA5 },
         { .reg = 0x00009888, .val = 0x278B002A },
         { .reg = 0x00009888, .val = 0x238B2A80 },
         { .reg = 0x00009888, .val = 0x0F8C4000 },
         { .reg = 0x00009888, .val = 0x178C2000 },
         { .reg = 0x00009888, .val = 0x198C5500 },
         { .reg = 0x00009888, .val = 0x1B8C0015 },
         { .reg = 0x00009888, .val = 0x078D8000 },
         { .reg = 0x00009888, .val = 0x098DA000 },
         { .reg = 0x00009888, .val = 0x0B8DA000 },
         { .reg = 0x00009888, .val = 0x0D8DA000 },
         { .reg = 0x00009888, .val = 0x0F8DA000 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0784C000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800C42 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45800063 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x47800800 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F8014A4 },
         { .reg = 0x00009888, .val = 0x41801042 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x0000FE7F },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFBF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFF7 },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FFF9 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__non_ps_thread11_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice1 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread11ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__ps_thread10_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice1 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice1 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread10ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__non_ps_thread10_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice1 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread10ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__ps_thread12_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice1 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice1 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread12ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__non_ps_thread12_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice1 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice1 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread12ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__ps_thread11_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice1 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice1 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread11ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 284;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 288;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "dff6158e-8409-452c-be76-91c9946330bd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 48);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x16150000 },
         { .reg = 0x00009888, .val = 0x16350000 },
         { .reg = 0x00009888, .val = 0x16550000 },
         { .reg = 0x00009888, .val = 0x16952E60 },
         { .reg = 0x00009888, .val = 0x16B54D60 },
         { .reg = 0x00009888, .val = 0x16D52E60 },
         { .reg = 0x00009888, .val = 0x065C8000 },
         { .reg = 0x00009888, .val = 0x085CC000 },
         { .reg = 0x00009888, .val = 0x0A5CC000 },
         { .reg = 0x00009888, .val = 0x0C5C4000 },
         { .reg = 0x00009888, .val = 0x0E3D8000 },
         { .reg = 0x00009888, .val = 0x183DA000 },
         { .reg = 0x00009888, .val = 0x06588000 },
         { .reg = 0x00009888, .val = 0x08588000 },
         { .reg = 0x00009888, .val = 0x0A584000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x185B5800 },
         { .reg = 0x00009888, .val = 0x1A5B000A },
         { .reg = 0x00009888, .val = 0x0E1FAA00 },
         { .reg = 0x00009888, .val = 0x101F02AA },
         { .reg = 0x00009888, .val = 0x0E384000 },
         { .reg = 0x00009888, .val = 0x16384000 },
         { .reg = 0x00009888, .val = 0x18382A55 },
         { .reg = 0x00009888, .val = 0x06398000 },
         { .reg = 0x00009888, .val = 0x0839A000 },
         { .reg = 0x00009888, .val = 0x0A39A000 },
         { .reg = 0x00009888, .val = 0x0C39A000 },
         { .reg = 0x00009888, .val = 0x0E39A000 },
         { .reg = 0x00009888, .val = 0x1A3A02A0 },
         { .reg = 0x00009888, .val = 0x0E138000 },
         { .reg = 0x00009888, .val = 0x16130500 },
         { .reg = 0x00009888, .val = 0x06148000 },
         { .reg = 0x00009888, .val = 0x08146000 },
         { .reg = 0x00009888, .val = 0x0615C100 },
         { .reg = 0x00009888, .val = 0x0815C500 },
         { .reg = 0x00009888, .val = 0x0A1500C3 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x16335040 },
         { .reg = 0x00009888, .val = 0x08349000 },
         { .reg = 0x00009888, .val = 0x0A341000 },
         { .reg = 0x00009888, .val = 0x083500C1 },
         { .reg = 0x00009888, .val = 0x0A35C500 },
         { .reg = 0x00009888, .val = 0x0C3500C3 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x1853002A },
         { .reg = 0x00009888, .val = 0x0A54E000 },
         { .reg = 0x00009888, .val = 0x0C55C500 },
         { .reg = 0x00009888, .val = 0x0E55C1C3 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x00DC8000 },
         { .reg = 0x00009888, .val = 0x02DCC000 },
         { .reg = 0x00009888, .val = 0x04DC4000 },
         { .reg = 0x00009888, .val = 0x04BD8000 },
         { .reg = 0x00009888, .val = 0x06BD8000 },
         { .reg = 0x00009888, .val = 0x02D8C000 },
         { .reg = 0x00009888, .val = 0x02DB8000 },
         { .reg = 0x00009888, .val = 0x04DB4000 },
         { .reg = 0x00009888, .val = 0x06DB4000 },
         { .reg = 0x00009888, .val = 0x08DB8000 },
         { .reg = 0x00009888, .val = 0x0C9FA000 },
         { .reg = 0x00009888, .val = 0x0E9F00AA },
         { .reg = 0x00009888, .val = 0x02B84000 },
         { .reg = 0x00009888, .val = 0x04B84000 },
         { .reg = 0x00009888, .val = 0x06B84000 },
         { .reg = 0x00009888, .val = 0x08B84000 },
         { .reg = 0x00009888, .val = 0x0AB88000 },
         { .reg = 0x00009888, .val = 0x0CB88000 },
         { .reg = 0x00009888, .val = 0x00B98000 },
         { .reg = 0x00009888, .val = 0x02B9A000 },
         { .reg = 0x00009888, .val = 0x04B9A000 },
         { .reg = 0x00009888, .val = 0x06B92000 },
         { .reg = 0x00009888, .val = 0x0ABA8000 },
         { .reg = 0x00009888, .val = 0x0CBA8000 },
         { .reg = 0x00009888, .val = 0x04938000 },
         { .reg = 0x00009888, .val = 0x06938000 },
         { .reg = 0x00009888, .val = 0x0494C000 },
         { .reg = 0x00009888, .val = 0x0295CFC7 },
         { .reg = 0x00009888, .val = 0x10950000 },
         { .reg = 0x00009888, .val = 0x02B38000 },
         { .reg = 0x00009888, .val = 0x08B38000 },
         { .reg = 0x00009888, .val = 0x04B42000 },
         { .reg = 0x00009888, .val = 0x06B41000 },
         { .reg = 0x00009888, .val = 0x00B5C700 },
         { .reg = 0x00009888, .val = 0x04B500CF },
         { .reg = 0x00009888, .val = 0x10B50000 },
         { .reg = 0x00009888, .val = 0x0AD38000 },
         { .reg = 0x00009888, .val = 0x0CD38000 },
         { .reg = 0x00009888, .val = 0x06D46000 },
         { .reg = 0x00009888, .val = 0x04D5C700 },
         { .reg = 0x00009888, .val = 0x06D500CF },
         { .reg = 0x00009888, .val = 0x10D50000 },
         { .reg = 0x00009888, .val = 0x03888000 },
         { .reg = 0x00009888, .val = 0x05888000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x09888000 },
         { .reg = 0x00009888, .val = 0x0B888000 },
         { .reg = 0x00009888, .val = 0x0D880400 },
         { .reg = 0x00009888, .val = 0x0F8A8000 },
         { .reg = 0x00009888, .val = 0x198A8000 },
         { .reg = 0x00009888, .val = 0x1B8AAAA0 },
         { .reg = 0x00009888, .val = 0x1D8A0002 },
         { .reg = 0x00009888, .val = 0x258B555A },
         { .reg = 0x00009888, .val = 0x278B0015 },
         { .reg = 0x00009888, .val = 0x238B5500 },
         { .reg = 0x00009888, .val = 0x038C4000 },
         { .reg = 0x00009888, .val = 0x058C4000 },
         { .reg = 0x00009888, .val = 0x078C4000 },
         { .reg = 0x00009888, .val = 0x098C4000 },
         { .reg = 0x00009888, .val = 0x0B8C4000 },
         { .reg = 0x00009888, .val = 0x0D8C4000 },
         { .reg = 0x00009888, .val = 0x018D8000 },
         { .reg = 0x00009888, .val = 0x038DA000 },
         { .reg = 0x00009888, .val = 0x058DA000 },
         { .reg = 0x00009888, .val = 0x078D2000 },
         { .reg = 0x00009888, .val = 0x2185AAAA },
         { .reg = 0x00009888, .val = 0x2385002A },
         { .reg = 0x00009888, .val = 0x1F85AA00 },
         { .reg = 0x00009888, .val = 0x0F834000 },
         { .reg = 0x00009888, .val = 0x19835400 },
         { .reg = 0x00009888, .val = 0x1B830155 },
         { .reg = 0x00009888, .val = 0x03834000 },
         { .reg = 0x00009888, .val = 0x05834000 },
         { .reg = 0x00009888, .val = 0x07834000 },
         { .reg = 0x00009888, .val = 0x09834000 },
         { .reg = 0x00009888, .val = 0x0B834000 },
         { .reg = 0x00009888, .val = 0x0D834000 },
         { .reg = 0x00009888, .val = 0x0784C000 },
         { .reg = 0x00009888, .val = 0x0984C000 },
         { .reg = 0x00009888, .val = 0x0B84C000 },
         { .reg = 0x00009888, .val = 0x0D84C000 },
         { .reg = 0x00009888, .val = 0x0F84C000 },
         { .reg = 0x00009888, .val = 0x01848000 },
         { .reg = 0x00009888, .val = 0x0384C000 },
         { .reg = 0x00009888, .val = 0x0584C000 },
         { .reg = 0x00009888, .val = 0x1780C000 },
         { .reg = 0x00009888, .val = 0x1980C000 },
         { .reg = 0x00009888, .val = 0x1B80C000 },
         { .reg = 0x00009888, .val = 0x1D80C000 },
         { .reg = 0x00009888, .val = 0x1F80C000 },
         { .reg = 0x00009888, .val = 0x11808000 },
         { .reg = 0x00009888, .val = 0x1380C000 },
         { .reg = 0x00009888, .val = 0x1580C000 },
         { .reg = 0x00009888, .val = 0x4F800000 },
         { .reg = 0x00009888, .val = 0x43800882 },
         { .reg = 0x00009888, .val = 0x51800000 },
         { .reg = 0x00009888, .val = 0x45801082 },
         { .reg = 0x00009888, .val = 0x53800000 },
         { .reg = 0x00009888, .val = 0x478014A5 },
         { .reg = 0x00009888, .val = 0x21800000 },
         { .reg = 0x00009888, .val = 0x31800000 },
         { .reg = 0x00009888, .val = 0x4D800000 },
         { .reg = 0x00009888, .val = 0x3F800002 },
         { .reg = 0x00009888, .val = 0x41800C62 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x0000FE7F },
         { .reg = 0x00002780, .val = 0x00000000 },
         { .reg = 0x00002784, .val = 0x0000FF9F },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000FFE7 },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFFB },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FFFD },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = chv__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__thread_header12_ready_port0__read;
         counter->name = "Thread Header Ready on Slice1 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader12ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 0x20) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__thread_header12_ready_port1__read;
         counter->name = "Thread Header Ready on Slice1 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader12ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__thread_header11_ready_port1__read;
         counter->name = "Thread Header Ready on Slice1 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader11ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__thread_header10_ready_port0__read;
         counter->name = "Thread Header Ready on Slice1 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader10ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (perf->sys_vars.subslice_mask & 0x8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__thread_header10_ready_port1__read;
         counter->name = "Thread Header Ready on Slice1 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader10ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      if (perf->sys_vars.subslice_mask & 0x10) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__thread_header11_ready_port0__read;
         counter->name = "Thread Header Ready on Slice1 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice1 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader11ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 284;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = chv__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 288;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
chv_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "4a534b07-cba3-414d-8d60-874830e883aa";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009888, .val = 0x59800001 },
         { .reg = 0x00009888, .val = 0x338B0000 },
         { .reg = 0x00009888, .val = 0x258B0066 },
         { .reg = 0x00009888, .val = 0x058B0000 },
         { .reg = 0x00009888, .val = 0x038B0000 },
         { .reg = 0x00009888, .val = 0x03844000 },
         { .reg = 0x00009888, .val = 0x47800080 },
         { .reg = 0x00009888, .val = 0x57800000 },
         { .reg = 0x001823A4, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x59800000 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = chv__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = chv__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_chv(struct intel_perf_config *perf)
{
   chv_register_render_basic_counter_query(perf);
   chv_register_compute_basic_counter_query(perf);
   chv_register_render_pipe_profile_counter_query(perf);
   chv_register_hdc_and_sf_counter_query(perf);
   chv_register_l3_1_counter_query(perf);
   chv_register_l3_2_counter_query(perf);
   chv_register_l3_3_counter_query(perf);
   chv_register_l3_4_counter_query(perf);
   chv_register_rasterizer_and_pixel_backend_counter_query(perf);
   chv_register_sampler_1_counter_query(perf);
   chv_register_sampler_2_counter_query(perf);
   chv_register_tdl_1_counter_query(perf);
   chv_register_tdl_2_counter_query(perf);
   chv_register_test_oa_counter_query(perf);
}


static void
sklgt2_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "07b25942-d9fd-4fce-bd58-e29abd66b7de";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.revision >= 0x02) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009888, .val = 0x166C01E0 },
            { .reg = 0x00009888, .val = 0x12170280 },
            { .reg = 0x00009888, .val = 0x12370280 },
            { .reg = 0x00009888, .val = 0x11930317 },
            { .reg = 0x00009888, .val = 0x159303DF },
            { .reg = 0x00009888, .val = 0x3F900003 },
            { .reg = 0x00009888, .val = 0x1A4E0080 },
            { .reg = 0x00009888, .val = 0x0A6C0053 },
            { .reg = 0x00009888, .val = 0x106C0000 },
            { .reg = 0x00009888, .val = 0x1C6C0000 },
            { .reg = 0x00009888, .val = 0x0A1B4000 },
            { .reg = 0x00009888, .val = 0x1C1C0001 },
            { .reg = 0x00009888, .val = 0x002F1000 },
            { .reg = 0x00009888, .val = 0x042F1000 },
            { .reg = 0x00009888, .val = 0x004C4000 },
            { .reg = 0x00009888, .val = 0x0A4C8400 },
            { .reg = 0x00009888, .val = 0x000D2000 },
            { .reg = 0x00009888, .val = 0x060D8000 },
            { .reg = 0x00009888, .val = 0x080DA000 },
            { .reg = 0x00009888, .val = 0x0A0D2000 },
            { .reg = 0x00009888, .val = 0x0C0F0400 },
            { .reg = 0x00009888, .val = 0x0E0F6600 },
            { .reg = 0x00009888, .val = 0x002C8000 },
            { .reg = 0x00009888, .val = 0x162C2200 },
            { .reg = 0x00009888, .val = 0x062D8000 },
            { .reg = 0x00009888, .val = 0x082D8000 },
            { .reg = 0x00009888, .val = 0x00133000 },
            { .reg = 0x00009888, .val = 0x08133000 },
            { .reg = 0x00009888, .val = 0x00170020 },
            { .reg = 0x00009888, .val = 0x08170021 },
            { .reg = 0x00009888, .val = 0x10170000 },
            { .reg = 0x00009888, .val = 0x0633C000 },
            { .reg = 0x00009888, .val = 0x0833C000 },
            { .reg = 0x00009888, .val = 0x06370800 },
            { .reg = 0x00009888, .val = 0x08370840 },
            { .reg = 0x00009888, .val = 0x10370000 },
            { .reg = 0x00009888, .val = 0x0D933031 },
            { .reg = 0x00009888, .val = 0x0F933E3F },
            { .reg = 0x00009888, .val = 0x01933D00 },
            { .reg = 0x00009888, .val = 0x0393073C },
            { .reg = 0x00009888, .val = 0x0593000E },
            { .reg = 0x00009888, .val = 0x1D930000 },
            { .reg = 0x00009888, .val = 0x19930000 },
            { .reg = 0x00009888, .val = 0x1B930000 },
            { .reg = 0x00009888, .val = 0x1D900157 },
            { .reg = 0x00009888, .val = 0x1F900158 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x2B908000 },
            { .reg = 0x00009888, .val = 0x2D908000 },
            { .reg = 0x00009888, .val = 0x2F908000 },
            { .reg = 0x00009888, .val = 0x31908000 },
            { .reg = 0x00009888, .val = 0x15908000 },
            { .reg = 0x00009888, .val = 0x17908000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1190001F },
            { .reg = 0x00009888, .val = 0x51904400 },
            { .reg = 0x00009888, .val = 0x41900020 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900C21 },
            { .reg = 0x00009888, .val = 0x47900061 },
            { .reg = 0x00009888, .val = 0x57904440 },
            { .reg = 0x00009888, .val = 0x49900000 },
            { .reg = 0x00009888, .val = 0x37900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900000 },
            { .reg = 0x00009888, .val = 0x59900004 },
            { .reg = 0x00009888, .val = 0x43900000 },
            { .reg = 0x00009888, .val = 0x53904444 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "00b80b4c-d215-4378-9015-da3dda3b61ea";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if ((perf->sys_vars.slice_mask & 0x01) &&
          (perf->sys_vars.revision < 0x02)) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x104F00E0 },
            { .reg = 0x00009888, .val = 0x124F1C00 },
            { .reg = 0x00009888, .val = 0x106C00E0 },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F901403 },
            { .reg = 0x00009888, .val = 0x184E8000 },
            { .reg = 0x00009888, .val = 0x1A4E8200 },
            { .reg = 0x00009888, .val = 0x044E8000 },
            { .reg = 0x00009888, .val = 0x004F0DB2 },
            { .reg = 0x00009888, .val = 0x064F0900 },
            { .reg = 0x00009888, .val = 0x084F1880 },
            { .reg = 0x00009888, .val = 0x0A4F0011 },
            { .reg = 0x00009888, .val = 0x0C4F0E3C },
            { .reg = 0x00009888, .val = 0x0E4F1D80 },
            { .reg = 0x00009888, .val = 0x086C0002 },
            { .reg = 0x00009888, .val = 0x0A6C0100 },
            { .reg = 0x00009888, .val = 0x0E6C000C },
            { .reg = 0x00009888, .val = 0x026C000B },
            { .reg = 0x00009888, .val = 0x1C6C0000 },
            { .reg = 0x00009888, .val = 0x1A6C0000 },
            { .reg = 0x00009888, .val = 0x081B4000 },
            { .reg = 0x00009888, .val = 0x0A1B8000 },
            { .reg = 0x00009888, .val = 0x0E1B4000 },
            { .reg = 0x00009888, .val = 0x021B4000 },
            { .reg = 0x00009888, .val = 0x1A1C4000 },
            { .reg = 0x00009888, .val = 0x1C1C0012 },
            { .reg = 0x00009888, .val = 0x141C8000 },
            { .reg = 0x00009888, .val = 0x005BC000 },
            { .reg = 0x00009888, .val = 0x065B8000 },
            { .reg = 0x00009888, .val = 0x085B8000 },
            { .reg = 0x00009888, .val = 0x0A5B4000 },
            { .reg = 0x00009888, .val = 0x0C5BC000 },
            { .reg = 0x00009888, .val = 0x0E5B8000 },
            { .reg = 0x00009888, .val = 0x105C8000 },
            { .reg = 0x00009888, .val = 0x1A5CA000 },
            { .reg = 0x00009888, .val = 0x1C5C002D },
            { .reg = 0x00009888, .val = 0x125C8000 },
            { .reg = 0x00009888, .val = 0x0A4C0800 },
            { .reg = 0x00009888, .val = 0x0C4C0082 },
            { .reg = 0x00009888, .val = 0x084C8000 },
            { .reg = 0x00009888, .val = 0x000DA000 },
            { .reg = 0x00009888, .val = 0x060D8000 },
            { .reg = 0x00009888, .val = 0x080DA000 },
            { .reg = 0x00009888, .val = 0x0A0DA000 },
            { .reg = 0x00009888, .val = 0x0C0DA000 },
            { .reg = 0x00009888, .val = 0x0E0DA000 },
            { .reg = 0x00009888, .val = 0x020D2000 },
            { .reg = 0x00009888, .val = 0x0C0F5400 },
            { .reg = 0x00009888, .val = 0x0E0F5500 },
            { .reg = 0x00009888, .val = 0x100F0155 },
            { .reg = 0x00009888, .val = 0x002CC000 },
            { .reg = 0x00009888, .val = 0x0E2CC000 },
            { .reg = 0x00009888, .val = 0x162CBE00 },
            { .reg = 0x00009888, .val = 0x182C00EF },
            { .reg = 0x00009888, .val = 0x022CC000 },
            { .reg = 0x00009888, .val = 0x042C8000 },
            { .reg = 0x00009888, .val = 0x19900157 },
            { .reg = 0x00009888, .val = 0x1B900167 },
            { .reg = 0x00009888, .val = 0x1D900105 },
            { .reg = 0x00009888, .val = 0x1F900103 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x11900FFF },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900840 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900842 },
            { .reg = 0x00009888, .val = 0x47900840 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900840 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900040 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900840 },
            { .reg = 0x00009888, .val = 0x53901111 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if ((perf->sys_vars.slice_mask & 0x01) &&
          (perf->sys_vars.revision >= 0x02)) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x104F00E0 },
            { .reg = 0x00009888, .val = 0x124F1C00 },
            { .reg = 0x00009888, .val = 0x106C00E0 },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F901403 },
            { .reg = 0x00009888, .val = 0x004E8000 },
            { .reg = 0x00009888, .val = 0x1A4E0820 },
            { .reg = 0x00009888, .val = 0x1C4E0002 },
            { .reg = 0x00009888, .val = 0x064F0900 },
            { .reg = 0x00009888, .val = 0x084F0032 },
            { .reg = 0x00009888, .val = 0x0A4F1810 },
            { .reg = 0x00009888, .val = 0x0C4F0E00 },
            { .reg = 0x00009888, .val = 0x0E4F003C },
            { .reg = 0x00009888, .val = 0x004F0D80 },
            { .reg = 0x00009888, .val = 0x024F003B },
            { .reg = 0x00009888, .val = 0x006C0002 },
            { .reg = 0x00009888, .val = 0x086C0000 },
            { .reg = 0x00009888, .val = 0x0C6C000C },
            { .reg = 0x00009888, .val = 0x0E6C0B00 },
            { .reg = 0x00009888, .val = 0x186C0000 },
            { .reg = 0x00009888, .val = 0x1C6C0000 },
            { .reg = 0x00009888, .val = 0x1E6C0000 },
            { .reg = 0x00009888, .val = 0x001B4000 },
            { .reg = 0x00009888, .val = 0x081B8000 },
            { .reg = 0x00009888, .val = 0x0C1B4000 },
            { .reg = 0x00009888, .val = 0x0E1B8000 },
            { .reg = 0x00009888, .val = 0x101C8000 },
            { .reg = 0x00009888, .val = 0x1A1C8000 },
            { .reg = 0x00009888, .val = 0x1C1C0024 },
            { .reg = 0x00009888, .val = 0x065B8000 },
            { .reg = 0x00009888, .val = 0x085B4000 },
            { .reg = 0x00009888, .val = 0x0A5BC000 },
            { .reg = 0x00009888, .val = 0x0C5B8000 },
            { .reg = 0x00009888, .val = 0x0E5B4000 },
            { .reg = 0x00009888, .val = 0x005B8000 },
            { .reg = 0x00009888, .val = 0x025B4000 },
            { .reg = 0x00009888, .val = 0x1A5C6000 },
            { .reg = 0x00009888, .val = 0x1C5C001B },
            { .reg = 0x00009888, .val = 0x125C8000 },
            { .reg = 0x00009888, .val = 0x145C8000 },
            { .reg = 0x00009888, .val = 0x004C8000 },
            { .reg = 0x00009888, .val = 0x0A4C2000 },
            { .reg = 0x00009888, .val = 0x0C4C0208 },
            { .reg = 0x00009888, .val = 0x000DA000 },
            { .reg = 0x00009888, .val = 0x060D8000 },
            { .reg = 0x00009888, .val = 0x080DA000 },
            { .reg = 0x00009888, .val = 0x0A0DA000 },
            { .reg = 0x00009888, .val = 0x0C0DA000 },
            { .reg = 0x00009888, .val = 0x0E0DA000 },
            { .reg = 0x00009888, .val = 0x020D2000 },
            { .reg = 0x00009888, .val = 0x0C0F5400 },
            { .reg = 0x00009888, .val = 0x0E0F5500 },
            { .reg = 0x00009888, .val = 0x100F0155 },
            { .reg = 0x00009888, .val = 0x002C8000 },
            { .reg = 0x00009888, .val = 0x0E2CC000 },
            { .reg = 0x00009888, .val = 0x162CFB00 },
            { .reg = 0x00009888, .val = 0x182C00BE },
            { .reg = 0x00009888, .val = 0x022CC000 },
            { .reg = 0x00009888, .val = 0x042CC000 },
            { .reg = 0x00009888, .val = 0x19900157 },
            { .reg = 0x00009888, .val = 0x1B900167 },
            { .reg = 0x00009888, .val = 0x1D900105 },
            { .reg = 0x00009888, .val = 0x1F900103 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x11900FFF },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900800 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900842 },
            { .reg = 0x00009888, .val = 0x47900802 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900802 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900002 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900842 },
            { .reg = 0x00009888, .val = 0x53901111 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "2a0c0933-37e7-427c-9951-ded42a78bb27";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.revision < 0x02) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x0C0E001F },
            { .reg = 0x00009888, .val = 0x0A0F0000 },
            { .reg = 0x00009888, .val = 0x10116800 },
            { .reg = 0x00009888, .val = 0x178A03E0 },
            { .reg = 0x00009888, .val = 0x11824C00 },
            { .reg = 0x00009888, .val = 0x11830020 },
            { .reg = 0x00009888, .val = 0x13840020 },
            { .reg = 0x00009888, .val = 0x11850019 },
            { .reg = 0x00009888, .val = 0x11860007 },
            { .reg = 0x00009888, .val = 0x01870C40 },
            { .reg = 0x00009888, .val = 0x17880000 },
            { .reg = 0x00009888, .val = 0x022F4000 },
            { .reg = 0x00009888, .val = 0x0A4C0040 },
            { .reg = 0x00009888, .val = 0x0C0D8000 },
            { .reg = 0x00009888, .val = 0x040D4000 },
            { .reg = 0x00009888, .val = 0x060D2000 },
            { .reg = 0x00009888, .val = 0x020E5400 },
            { .reg = 0x00009888, .val = 0x000E0000 },
            { .reg = 0x00009888, .val = 0x080F0040 },
            { .reg = 0x00009888, .val = 0x000F0000 },
            { .reg = 0x00009888, .val = 0x100F0000 },
            { .reg = 0x00009888, .val = 0x0E0F0040 },
            { .reg = 0x00009888, .val = 0x0C2C8000 },
            { .reg = 0x00009888, .val = 0x06104000 },
            { .reg = 0x00009888, .val = 0x06110012 },
            { .reg = 0x00009888, .val = 0x06131000 },
            { .reg = 0x00009888, .val = 0x01898000 },
            { .reg = 0x00009888, .val = 0x0D890100 },
            { .reg = 0x00009888, .val = 0x03898000 },
            { .reg = 0x00009888, .val = 0x09808000 },
            { .reg = 0x00009888, .val = 0x0B808000 },
            { .reg = 0x00009888, .val = 0x0380C000 },
            { .reg = 0x00009888, .val = 0x0F8A0075 },
            { .reg = 0x00009888, .val = 0x1D8A0000 },
            { .reg = 0x00009888, .val = 0x118A8000 },
            { .reg = 0x00009888, .val = 0x1B8A4000 },
            { .reg = 0x00009888, .val = 0x138A8000 },
            { .reg = 0x00009888, .val = 0x1D81A000 },
            { .reg = 0x00009888, .val = 0x15818000 },
            { .reg = 0x00009888, .val = 0x17818000 },
            { .reg = 0x00009888, .val = 0x0B820030 },
            { .reg = 0x00009888, .val = 0x07828000 },
            { .reg = 0x00009888, .val = 0x0D824000 },
            { .reg = 0x00009888, .val = 0x0F828000 },
            { .reg = 0x00009888, .val = 0x05824000 },
            { .reg = 0x00009888, .val = 0x0D830003 },
            { .reg = 0x00009888, .val = 0x0583000C },
            { .reg = 0x00009888, .val = 0x09830000 },
            { .reg = 0x00009888, .val = 0x03838000 },
            { .reg = 0x00009888, .val = 0x07838000 },
            { .reg = 0x00009888, .val = 0x0B840980 },
            { .reg = 0x00009888, .val = 0x03844D80 },
            { .reg = 0x00009888, .val = 0x11840000 },
            { .reg = 0x00009888, .val = 0x09848000 },
            { .reg = 0x00009888, .val = 0x09850080 },
            { .reg = 0x00009888, .val = 0x03850003 },
            { .reg = 0x00009888, .val = 0x01850000 },
            { .reg = 0x00009888, .val = 0x07860000 },
            { .reg = 0x00009888, .val = 0x0F860400 },
            { .reg = 0x00009888, .val = 0x09870032 },
            { .reg = 0x00009888, .val = 0x01888052 },
            { .reg = 0x00009888, .val = 0x11880000 },
            { .reg = 0x00009888, .val = 0x09884000 },
            { .reg = 0x00009888, .val = 0x15968000 },
            { .reg = 0x00009888, .val = 0x17968000 },
            { .reg = 0x00009888, .val = 0x0F96C000 },
            { .reg = 0x00009888, .val = 0x1F950011 },
            { .reg = 0x00009888, .val = 0x1D950014 },
            { .reg = 0x00009888, .val = 0x0592C000 },
            { .reg = 0x00009888, .val = 0x0B928000 },
            { .reg = 0x00009888, .val = 0x0D924000 },
            { .reg = 0x00009888, .val = 0x0F924000 },
            { .reg = 0x00009888, .val = 0x11928000 },
            { .reg = 0x00009888, .val = 0x1392C000 },
            { .reg = 0x00009888, .val = 0x09924000 },
            { .reg = 0x00009888, .val = 0x01985000 },
            { .reg = 0x00009888, .val = 0x07988000 },
            { .reg = 0x00009888, .val = 0x09981000 },
            { .reg = 0x00009888, .val = 0x0B982000 },
            { .reg = 0x00009888, .val = 0x0D982000 },
            { .reg = 0x00009888, .val = 0x0F989000 },
            { .reg = 0x00009888, .val = 0x05982000 },
            { .reg = 0x00009888, .val = 0x13904000 },
            { .reg = 0x00009888, .val = 0x21904000 },
            { .reg = 0x00009888, .val = 0x23904000 },
            { .reg = 0x00009888, .val = 0x25908000 },
            { .reg = 0x00009888, .val = 0x27904000 },
            { .reg = 0x00009888, .val = 0x29908000 },
            { .reg = 0x00009888, .val = 0x2B904000 },
            { .reg = 0x00009888, .val = 0x2F904000 },
            { .reg = 0x00009888, .val = 0x31904000 },
            { .reg = 0x00009888, .val = 0x15904000 },
            { .reg = 0x00009888, .val = 0x17908000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B904000 },
            { .reg = 0x00009888, .val = 0x0B978000 },
            { .reg = 0x00009888, .val = 0x0F974000 },
            { .reg = 0x00009888, .val = 0x11974000 },
            { .reg = 0x00009888, .val = 0x13978000 },
            { .reg = 0x00009888, .val = 0x09974000 },
            { .reg = 0x00009888, .val = 0x1190C080 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x419010A0 },
            { .reg = 0x00009888, .val = 0x55904000 },
            { .reg = 0x00009888, .val = 0x45901000 },
            { .reg = 0x00009888, .val = 0x47900084 },
            { .reg = 0x00009888, .val = 0x57904400 },
            { .reg = 0x00009888, .val = 0x499000A5 },
            { .reg = 0x00009888, .val = 0x37900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900081 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x439014A4 },
            { .reg = 0x00009888, .val = 0x53900400 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.revision >= 0x02) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x0C0E001F },
            { .reg = 0x00009888, .val = 0x0A0F0000 },
            { .reg = 0x00009888, .val = 0x10116800 },
            { .reg = 0x00009888, .val = 0x178A03E0 },
            { .reg = 0x00009888, .val = 0x11824C00 },
            { .reg = 0x00009888, .val = 0x11830020 },
            { .reg = 0x00009888, .val = 0x13840020 },
            { .reg = 0x00009888, .val = 0x11850019 },
            { .reg = 0x00009888, .val = 0x11860007 },
            { .reg = 0x00009888, .val = 0x01870C40 },
            { .reg = 0x00009888, .val = 0x17880000 },
            { .reg = 0x00009888, .val = 0x022F4000 },
            { .reg = 0x00009888, .val = 0x0A4C0040 },
            { .reg = 0x00009888, .val = 0x0C0D8000 },
            { .reg = 0x00009888, .val = 0x040D4000 },
            { .reg = 0x00009888, .val = 0x060D2000 },
            { .reg = 0x00009888, .val = 0x020E5400 },
            { .reg = 0x00009888, .val = 0x000E0000 },
            { .reg = 0x00009888, .val = 0x080F0040 },
            { .reg = 0x00009888, .val = 0x000F0000 },
            { .reg = 0x00009888, .val = 0x100F0000 },
            { .reg = 0x00009888, .val = 0x0E0F0040 },
            { .reg = 0x00009888, .val = 0x0C2C8000 },
            { .reg = 0x00009888, .val = 0x06104000 },
            { .reg = 0x00009888, .val = 0x06110012 },
            { .reg = 0x00009888, .val = 0x06131000 },
            { .reg = 0x00009888, .val = 0x01898000 },
            { .reg = 0x00009888, .val = 0x0D890100 },
            { .reg = 0x00009888, .val = 0x03898000 },
            { .reg = 0x00009888, .val = 0x09808000 },
            { .reg = 0x00009888, .val = 0x0B808000 },
            { .reg = 0x00009888, .val = 0x0380C000 },
            { .reg = 0x00009888, .val = 0x0F8A0075 },
            { .reg = 0x00009888, .val = 0x1D8A0000 },
            { .reg = 0x00009888, .val = 0x118A8000 },
            { .reg = 0x00009888, .val = 0x1B8A4000 },
            { .reg = 0x00009888, .val = 0x138A8000 },
            { .reg = 0x00009888, .val = 0x1D81A000 },
            { .reg = 0x00009888, .val = 0x15818000 },
            { .reg = 0x00009888, .val = 0x17818000 },
            { .reg = 0x00009888, .val = 0x0B820030 },
            { .reg = 0x00009888, .val = 0x07828000 },
            { .reg = 0x00009888, .val = 0x0D824000 },
            { .reg = 0x00009888, .val = 0x0F828000 },
            { .reg = 0x00009888, .val = 0x05824000 },
            { .reg = 0x00009888, .val = 0x0D830003 },
            { .reg = 0x00009888, .val = 0x0583000C },
            { .reg = 0x00009888, .val = 0x09830000 },
            { .reg = 0x00009888, .val = 0x03838000 },
            { .reg = 0x00009888, .val = 0x07838000 },
            { .reg = 0x00009888, .val = 0x0B840980 },
            { .reg = 0x00009888, .val = 0x03844D80 },
            { .reg = 0x00009888, .val = 0x11840000 },
            { .reg = 0x00009888, .val = 0x09848000 },
            { .reg = 0x00009888, .val = 0x09850080 },
            { .reg = 0x00009888, .val = 0x03850003 },
            { .reg = 0x00009888, .val = 0x01850000 },
            { .reg = 0x00009888, .val = 0x07860000 },
            { .reg = 0x00009888, .val = 0x0F860400 },
            { .reg = 0x00009888, .val = 0x09870032 },
            { .reg = 0x00009888, .val = 0x01888052 },
            { .reg = 0x00009888, .val = 0x11880000 },
            { .reg = 0x00009888, .val = 0x09884000 },
            { .reg = 0x00009888, .val = 0x1B931001 },
            { .reg = 0x00009888, .val = 0x1D930001 },
            { .reg = 0x00009888, .val = 0x19934000 },
            { .reg = 0x00009888, .val = 0x1B958000 },
            { .reg = 0x00009888, .val = 0x1D950094 },
            { .reg = 0x00009888, .val = 0x19958000 },
            { .reg = 0x00009888, .val = 0x05E5A000 },
            { .reg = 0x00009888, .val = 0x01E5C000 },
            { .reg = 0x00009888, .val = 0x0592C000 },
            { .reg = 0x00009888, .val = 0x0B928000 },
            { .reg = 0x00009888, .val = 0x0D924000 },
            { .reg = 0x00009888, .val = 0x0F924000 },
            { .reg = 0x00009888, .val = 0x11928000 },
            { .reg = 0x00009888, .val = 0x1392C000 },
            { .reg = 0x00009888, .val = 0x09924000 },
            { .reg = 0x00009888, .val = 0x01985000 },
            { .reg = 0x00009888, .val = 0x07988000 },
            { .reg = 0x00009888, .val = 0x09981000 },
            { .reg = 0x00009888, .val = 0x0B982000 },
            { .reg = 0x00009888, .val = 0x0D982000 },
            { .reg = 0x00009888, .val = 0x0F989000 },
            { .reg = 0x00009888, .val = 0x05982000 },
            { .reg = 0x00009888, .val = 0x13904000 },
            { .reg = 0x00009888, .val = 0x21904000 },
            { .reg = 0x00009888, .val = 0x23904000 },
            { .reg = 0x00009888, .val = 0x25908000 },
            { .reg = 0x00009888, .val = 0x27904000 },
            { .reg = 0x00009888, .val = 0x29908000 },
            { .reg = 0x00009888, .val = 0x2B904000 },
            { .reg = 0x00009888, .val = 0x2F904000 },
            { .reg = 0x00009888, .val = 0x31904000 },
            { .reg = 0x00009888, .val = 0x15904000 },
            { .reg = 0x00009888, .val = 0x17908000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B904000 },
            { .reg = 0x00009888, .val = 0x1190C080 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x419010A0 },
            { .reg = 0x00009888, .val = 0x55904000 },
            { .reg = 0x00009888, .val = 0x45901000 },
            { .reg = 0x00009888, .val = 0x47900084 },
            { .reg = 0x00009888, .val = 0x57904400 },
            { .reg = 0x00009888, .val = 0x499000A5 },
            { .reg = 0x00009888, .val = 0x37900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900081 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x439014A4 },
            { .reg = 0x00009888, .val = 0x53900400 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "246b35f1-44e0-4d03-8936-e452e291d064";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if ((perf->sys_vars.slice_mask & 0x01) &&
          (perf->sys_vars.revision < 0x02)) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x11810C00 },
            { .reg = 0x00009888, .val = 0x1381001A },
            { .reg = 0x00009888, .val = 0x13946000 },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F900003 },
            { .reg = 0x00009888, .val = 0x03811300 },
            { .reg = 0x00009888, .val = 0x05811B12 },
            { .reg = 0x00009888, .val = 0x0781001A },
            { .reg = 0x00009888, .val = 0x1F810000 },
            { .reg = 0x00009888, .val = 0x17810000 },
            { .reg = 0x00009888, .val = 0x19810000 },
            { .reg = 0x00009888, .val = 0x1B810000 },
            { .reg = 0x00009888, .val = 0x1D810000 },
            { .reg = 0x00009888, .val = 0x0F968000 },
            { .reg = 0x00009888, .val = 0x1196C000 },
            { .reg = 0x00009888, .val = 0x13964000 },
            { .reg = 0x00009888, .val = 0x11938000 },
            { .reg = 0x00009888, .val = 0x1B93FE00 },
            { .reg = 0x00009888, .val = 0x01940010 },
            { .reg = 0x00009888, .val = 0x07941100 },
            { .reg = 0x00009888, .val = 0x09941312 },
            { .reg = 0x00009888, .val = 0x0B941514 },
            { .reg = 0x00009888, .val = 0x0D941716 },
            { .reg = 0x00009888, .val = 0x11940000 },
            { .reg = 0x00009888, .val = 0x19940000 },
            { .reg = 0x00009888, .val = 0x1B940000 },
            { .reg = 0x00009888, .val = 0x1D940000 },
            { .reg = 0x00009888, .val = 0x1B954000 },
            { .reg = 0x00009888, .val = 0x1D95A550 },
            { .reg = 0x00009888, .val = 0x1F9502AA },
            { .reg = 0x00009888, .val = 0x2F900157 },
            { .reg = 0x00009888, .val = 0x31900105 },
            { .reg = 0x00009888, .val = 0x15900103 },
            { .reg = 0x00009888, .val = 0x17900101 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x13908000 },
            { .reg = 0x00009888, .val = 0x21908000 },
            { .reg = 0x00009888, .val = 0x23908000 },
            { .reg = 0x00009888, .val = 0x25908000 },
            { .reg = 0x00009888, .val = 0x27908000 },
            { .reg = 0x00009888, .val = 0x29908000 },
            { .reg = 0x00009888, .val = 0x2B908000 },
            { .reg = 0x00009888, .val = 0x2D908000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1D908000 },
            { .reg = 0x00009888, .val = 0x1F908000 },
            { .reg = 0x00009888, .val = 0x11900000 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900C00 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900000 },
            { .reg = 0x00009888, .val = 0x47900000 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900063 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900003 },
            { .reg = 0x00009888, .val = 0x53900000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if ((perf->sys_vars.revision < 0x05) &&
          (perf->sys_vars.revision >= 0x02)) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x11810C00 },
            { .reg = 0x00009888, .val = 0x1381001A },
            { .reg = 0x00009888, .val = 0x13946000 },
            { .reg = 0x00009888, .val = 0x15940016 },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x03811300 },
            { .reg = 0x00009888, .val = 0x05811B12 },
            { .reg = 0x00009888, .val = 0x0781001A },
            { .reg = 0x00009888, .val = 0x1F810000 },
            { .reg = 0x00009888, .val = 0x17810000 },
            { .reg = 0x00009888, .val = 0x19810000 },
            { .reg = 0x00009888, .val = 0x1B810000 },
            { .reg = 0x00009888, .val = 0x1D810000 },
            { .reg = 0x00009888, .val = 0x19930800 },
            { .reg = 0x00009888, .val = 0x1B93AA55 },
            { .reg = 0x00009888, .val = 0x1D9300AA },
            { .reg = 0x00009888, .val = 0x01940010 },
            { .reg = 0x00009888, .val = 0x07941100 },
            { .reg = 0x00009888, .val = 0x09941312 },
            { .reg = 0x00009888, .val = 0x0B941514 },
            { .reg = 0x00009888, .val = 0x0D941716 },
            { .reg = 0x00009888, .val = 0x0F940018 },
            { .reg = 0x00009888, .val = 0x1B940000 },
            { .reg = 0x00009888, .val = 0x11940000 },
            { .reg = 0x00009888, .val = 0x01E58000 },
            { .reg = 0x00009888, .val = 0x03E57000 },
            { .reg = 0x00009888, .val = 0x31900105 },
            { .reg = 0x00009888, .val = 0x15900103 },
            { .reg = 0x00009888, .val = 0x17900101 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x13908000 },
            { .reg = 0x00009888, .val = 0x21908000 },
            { .reg = 0x00009888, .val = 0x23908000 },
            { .reg = 0x00009888, .val = 0x25908000 },
            { .reg = 0x00009888, .val = 0x27908000 },
            { .reg = 0x00009888, .val = 0x29908000 },
            { .reg = 0x00009888, .val = 0x2B908000 },
            { .reg = 0x00009888, .val = 0x2D908000 },
            { .reg = 0x00009888, .val = 0x2F908000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1D908000 },
            { .reg = 0x00009888, .val = 0x1F908000 },
            { .reg = 0x00009888, .val = 0x11900000 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900C20 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900400 },
            { .reg = 0x00009888, .val = 0x47900421 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900421 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900061 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900003 },
            { .reg = 0x00009888, .val = 0x53900000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.revision >= 0x05) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009888, .val = 0x11810C00 },
            { .reg = 0x00009888, .val = 0x1381001A },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F900064 },
            { .reg = 0x00009888, .val = 0x03811300 },
            { .reg = 0x00009888, .val = 0x05811B12 },
            { .reg = 0x00009888, .val = 0x0781001A },
            { .reg = 0x00009888, .val = 0x1F810000 },
            { .reg = 0x00009888, .val = 0x17810000 },
            { .reg = 0x00009888, .val = 0x19810000 },
            { .reg = 0x00009888, .val = 0x1B810000 },
            { .reg = 0x00009888, .val = 0x1D810000 },
            { .reg = 0x00009888, .val = 0x1B930055 },
            { .reg = 0x00009888, .val = 0x03E58000 },
            { .reg = 0x00009888, .val = 0x05E5C000 },
            { .reg = 0x00009888, .val = 0x07E54000 },
            { .reg = 0x00009888, .val = 0x13900150 },
            { .reg = 0x00009888, .val = 0x21900151 },
            { .reg = 0x00009888, .val = 0x23900152 },
            { .reg = 0x00009888, .val = 0x25900153 },
            { .reg = 0x00009888, .val = 0x27900154 },
            { .reg = 0x00009888, .val = 0x29900155 },
            { .reg = 0x00009888, .val = 0x2B900156 },
            { .reg = 0x00009888, .val = 0x2D900157 },
            { .reg = 0x00009888, .val = 0x2F90015F },
            { .reg = 0x00009888, .val = 0x31900105 },
            { .reg = 0x00009888, .val = 0x15900103 },
            { .reg = 0x00009888, .val = 0x17900101 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1D908000 },
            { .reg = 0x00009888, .val = 0x1F908000 },
            { .reg = 0x00009888, .val = 0x11900000 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900C60 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900C00 },
            { .reg = 0x00009888, .val = 0x47900C63 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900C63 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900063 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900003 },
            { .reg = 0x00009888, .val = 0x53900000 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "e0d3da02-00bf-4a96-9795-b48158c73a68";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if ((perf->sys_vars.slice_mask & 0x01) &&
          (perf->sys_vars.revision < 0x02)) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x11810C00 },
            { .reg = 0x00009888, .val = 0x1381001A },
            { .reg = 0x00009888, .val = 0x13945400 },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F901400 },
            { .reg = 0x00009888, .val = 0x03811300 },
            { .reg = 0x00009888, .val = 0x05811B12 },
            { .reg = 0x00009888, .val = 0x0781001A },
            { .reg = 0x00009888, .val = 0x1F810000 },
            { .reg = 0x00009888, .val = 0x17810000 },
            { .reg = 0x00009888, .val = 0x19810000 },
            { .reg = 0x00009888, .val = 0x1B810000 },
            { .reg = 0x00009888, .val = 0x1D810000 },
            { .reg = 0x00009888, .val = 0x0F968000 },
            { .reg = 0x00009888, .val = 0x1196C000 },
            { .reg = 0x00009888, .val = 0x13964000 },
            { .reg = 0x00009888, .val = 0x11938000 },
            { .reg = 0x00009888, .val = 0x1B93FE00 },
            { .reg = 0x00009888, .val = 0x01940010 },
            { .reg = 0x00009888, .val = 0x07941100 },
            { .reg = 0x00009888, .val = 0x09941312 },
            { .reg = 0x00009888, .val = 0x0B941514 },
            { .reg = 0x00009888, .val = 0x0D941716 },
            { .reg = 0x00009888, .val = 0x11940000 },
            { .reg = 0x00009888, .val = 0x19940000 },
            { .reg = 0x00009888, .val = 0x1B940000 },
            { .reg = 0x00009888, .val = 0x1D940000 },
            { .reg = 0x00009888, .val = 0x1B954000 },
            { .reg = 0x00009888, .val = 0x1D95A550 },
            { .reg = 0x00009888, .val = 0x1F9502AA },
            { .reg = 0x00009888, .val = 0x2F900167 },
            { .reg = 0x00009888, .val = 0x31900105 },
            { .reg = 0x00009888, .val = 0x15900103 },
            { .reg = 0x00009888, .val = 0x17900101 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x13908000 },
            { .reg = 0x00009888, .val = 0x21908000 },
            { .reg = 0x00009888, .val = 0x23908000 },
            { .reg = 0x00009888, .val = 0x25908000 },
            { .reg = 0x00009888, .val = 0x27908000 },
            { .reg = 0x00009888, .val = 0x29908000 },
            { .reg = 0x00009888, .val = 0x2B908000 },
            { .reg = 0x00009888, .val = 0x2D908000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1D908000 },
            { .reg = 0x00009888, .val = 0x1F908000 },
            { .reg = 0x00009888, .val = 0x11900000 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900C00 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900000 },
            { .reg = 0x00009888, .val = 0x47900000 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900063 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900003 },
            { .reg = 0x00009888, .val = 0x53900000 },
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009888, .val = 0x11810C00 },
            { .reg = 0x00009888, .val = 0x1381001A },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F901000 },
            { .reg = 0x00009888, .val = 0x03811300 },
            { .reg = 0x00009888, .val = 0x05811B12 },
            { .reg = 0x00009888, .val = 0x0781001A },
            { .reg = 0x00009888, .val = 0x1F810000 },
            { .reg = 0x00009888, .val = 0x17810000 },
            { .reg = 0x00009888, .val = 0x19810000 },
            { .reg = 0x00009888, .val = 0x1B810000 },
            { .reg = 0x00009888, .val = 0x1D810000 },
            { .reg = 0x00009888, .val = 0x1B930055 },
            { .reg = 0x00009888, .val = 0x03E58000 },
            { .reg = 0x00009888, .val = 0x05E5C000 },
            { .reg = 0x00009888, .val = 0x07E54000 },
            { .reg = 0x00009888, .val = 0x13900160 },
            { .reg = 0x00009888, .val = 0x21900161 },
            { .reg = 0x00009888, .val = 0x23900162 },
            { .reg = 0x00009888, .val = 0x25900163 },
            { .reg = 0x00009888, .val = 0x27900164 },
            { .reg = 0x00009888, .val = 0x29900165 },
            { .reg = 0x00009888, .val = 0x2B900166 },
            { .reg = 0x00009888, .val = 0x2D900167 },
            { .reg = 0x00009888, .val = 0x2F900150 },
            { .reg = 0x00009888, .val = 0x31900105 },
            { .reg = 0x00009888, .val = 0x15900103 },
            { .reg = 0x00009888, .val = 0x17900101 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1D908000 },
            { .reg = 0x00009888, .val = 0x1F908000 },
            { .reg = 0x00009888, .val = 0x11900000 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900C60 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900C00 },
            { .reg = 0x00009888, .val = 0x47900C63 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900C63 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900063 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900003 },
            { .reg = 0x00009888, .val = 0x53900000 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if ((perf->sys_vars.revision < 0x05) &&
          (perf->sys_vars.revision >= 0x02)) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x11810C00 },
            { .reg = 0x00009888, .val = 0x1381001A },
            { .reg = 0x00009888, .val = 0x13945400 },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F901400 },
            { .reg = 0x00009888, .val = 0x03811300 },
            { .reg = 0x00009888, .val = 0x05811B12 },
            { .reg = 0x00009888, .val = 0x0781001A },
            { .reg = 0x00009888, .val = 0x1F810000 },
            { .reg = 0x00009888, .val = 0x17810000 },
            { .reg = 0x00009888, .val = 0x19810000 },
            { .reg = 0x00009888, .val = 0x1B810000 },
            { .reg = 0x00009888, .val = 0x1D810000 },
            { .reg = 0x00009888, .val = 0x19930800 },
            { .reg = 0x00009888, .val = 0x1B93AA55 },
            { .reg = 0x00009888, .val = 0x1D93002A },
            { .reg = 0x00009888, .val = 0x01940010 },
            { .reg = 0x00009888, .val = 0x07941100 },
            { .reg = 0x00009888, .val = 0x09941312 },
            { .reg = 0x00009888, .val = 0x0B941514 },
            { .reg = 0x00009888, .val = 0x0D941716 },
            { .reg = 0x00009888, .val = 0x1B940000 },
            { .reg = 0x00009888, .val = 0x11940000 },
            { .reg = 0x00009888, .val = 0x01E58000 },
            { .reg = 0x00009888, .val = 0x03E57000 },
            { .reg = 0x00009888, .val = 0x2F900167 },
            { .reg = 0x00009888, .val = 0x31900105 },
            { .reg = 0x00009888, .val = 0x15900103 },
            { .reg = 0x00009888, .val = 0x17900101 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x13908000 },
            { .reg = 0x00009888, .val = 0x21908000 },
            { .reg = 0x00009888, .val = 0x23908000 },
            { .reg = 0x00009888, .val = 0x25908000 },
            { .reg = 0x00009888, .val = 0x27908000 },
            { .reg = 0x00009888, .val = 0x29908000 },
            { .reg = 0x00009888, .val = 0x2B908000 },
            { .reg = 0x00009888, .val = 0x2D908000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1D908000 },
            { .reg = 0x00009888, .val = 0x1F908000 },
            { .reg = 0x00009888, .val = 0x11900000 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900C20 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900400 },
            { .reg = 0x00009888, .val = 0x47900421 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900421 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900063 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900003 },
            { .reg = 0x00009888, .val = 0x53900000 },
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009888, .val = 0x11810C00 },
            { .reg = 0x00009888, .val = 0x1381001A },
            { .reg = 0x00009888, .val = 0x37906800 },
            { .reg = 0x00009888, .val = 0x3F901000 },
            { .reg = 0x00009888, .val = 0x03811300 },
            { .reg = 0x00009888, .val = 0x05811B12 },
            { .reg = 0x00009888, .val = 0x0781001A },
            { .reg = 0x00009888, .val = 0x1F810000 },
            { .reg = 0x00009888, .val = 0x17810000 },
            { .reg = 0x00009888, .val = 0x19810000 },
            { .reg = 0x00009888, .val = 0x1B810000 },
            { .reg = 0x00009888, .val = 0x1D810000 },
            { .reg = 0x00009888, .val = 0x1B930055 },
            { .reg = 0x00009888, .val = 0x03E58000 },
            { .reg = 0x00009888, .val = 0x05E5C000 },
            { .reg = 0x00009888, .val = 0x07E54000 },
            { .reg = 0x00009888, .val = 0x13900160 },
            { .reg = 0x00009888, .val = 0x21900161 },
            { .reg = 0x00009888, .val = 0x23900162 },
            { .reg = 0x00009888, .val = 0x25900163 },
            { .reg = 0x00009888, .val = 0x27900164 },
            { .reg = 0x00009888, .val = 0x29900165 },
            { .reg = 0x00009888, .val = 0x2B900166 },
            { .reg = 0x00009888, .val = 0x2D900167 },
            { .reg = 0x00009888, .val = 0x2F900150 },
            { .reg = 0x00009888, .val = 0x31900105 },
            { .reg = 0x00009888, .val = 0x15900103 },
            { .reg = 0x00009888, .val = 0x17900101 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x19908000 },
            { .reg = 0x00009888, .val = 0x1B908000 },
            { .reg = 0x00009888, .val = 0x1D908000 },
            { .reg = 0x00009888, .val = 0x1F908000 },
            { .reg = 0x00009888, .val = 0x11900000 },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900C60 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900C00 },
            { .reg = 0x00009888, .val = 0x47900C63 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900C63 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900063 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900003 },
            { .reg = 0x00009888, .val = 0x53900000 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended metrics set";
   query->symbol_name = "ComputeExtended";
   query->guid = "c26b1fda-2752-4a33-a448-4c8718366846";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.subslice_mask & 0x01) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x000000A0 },
            { .reg = 0x00009888, .val = 0x106C00E0 },
            { .reg = 0x00009888, .val = 0x141C8160 },
            { .reg = 0x00009888, .val = 0x161C8015 },
            { .reg = 0x00009888, .val = 0x181C0120 },
            { .reg = 0x00009888, .val = 0x004E8000 },
            { .reg = 0x00009888, .val = 0x0E4E8000 },
            { .reg = 0x00009888, .val = 0x184E8000 },
            { .reg = 0x00009888, .val = 0x1A4EAAA0 },
            { .reg = 0x00009888, .val = 0x1C4E0002 },
            { .reg = 0x00009888, .val = 0x024E8000 },
            { .reg = 0x00009888, .val = 0x044E8000 },
            { .reg = 0x00009888, .val = 0x064E8000 },
            { .reg = 0x00009888, .val = 0x084E8000 },
            { .reg = 0x00009888, .val = 0x0A4E8000 },
            { .reg = 0x00009888, .val = 0x0E6C0B01 },
            { .reg = 0x00009888, .val = 0x006C0200 },
            { .reg = 0x00009888, .val = 0x026C000C },
            { .reg = 0x00009888, .val = 0x1C6C0000 },
            { .reg = 0x00009888, .val = 0x1E6C0000 },
            { .reg = 0x00009888, .val = 0x1A6C0000 },
            { .reg = 0x00009888, .val = 0x0E1BC000 },
            { .reg = 0x00009888, .val = 0x001B8000 },
            { .reg = 0x00009888, .val = 0x021BC000 },
            { .reg = 0x00009888, .val = 0x001C0041 },
            { .reg = 0x00009888, .val = 0x061C4200 },
            { .reg = 0x00009888, .val = 0x081C4443 },
            { .reg = 0x00009888, .val = 0x0A1C4645 },
            { .reg = 0x00009888, .val = 0x0C1C7647 },
            { .reg = 0x00009888, .val = 0x041C7357 },
            { .reg = 0x00009888, .val = 0x1C1C0030 },
            { .reg = 0x00009888, .val = 0x101C0000 },
            { .reg = 0x00009888, .val = 0x1A1C0000 },
            { .reg = 0x00009888, .val = 0x121C8000 },
            { .reg = 0x00009888, .val = 0x004C8000 },
            { .reg = 0x00009888, .val = 0x0A4CAA2A },
            { .reg = 0x00009888, .val = 0x0C4C02AA },
            { .reg = 0x00009888, .val = 0x084CA000 },
            { .reg = 0x00009888, .val = 0x000DA000 },
            { .reg = 0x00009888, .val = 0x060D8000 },
            { .reg = 0x00009888, .val = 0x080DA000 },
            { .reg = 0x00009888, .val = 0x0A0DA000 },
            { .reg = 0x00009888, .val = 0x0C0DA000 },
            { .reg = 0x00009888, .val = 0x0E0DA000 },
            { .reg = 0x00009888, .val = 0x020DA000 },
            { .reg = 0x00009888, .val = 0x040DA000 },
            { .reg = 0x00009888, .val = 0x0C0F5400 },
            { .reg = 0x00009888, .val = 0x0E0F5515 },
            { .reg = 0x00009888, .val = 0x100F0155 },
            { .reg = 0x00009888, .val = 0x002C8000 },
            { .reg = 0x00009888, .val = 0x0E2C8000 },
            { .reg = 0x00009888, .val = 0x162CAA00 },
            { .reg = 0x00009888, .val = 0x182C00AA },
            { .reg = 0x00009888, .val = 0x022C8000 },
            { .reg = 0x00009888, .val = 0x042C8000 },
            { .reg = 0x00009888, .val = 0x062C8000 },
            { .reg = 0x00009888, .val = 0x082C8000 },
            { .reg = 0x00009888, .val = 0x0A2C8000 },
            { .reg = 0x00009888, .val = 0x11907FFF },
            { .reg = 0x00009888, .val = 0x51900000 },
            { .reg = 0x00009888, .val = 0x41900040 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x45900802 },
            { .reg = 0x00009888, .val = 0x47900842 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900842 },
            { .reg = 0x00009888, .val = 0x37900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4B900000 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x43900800 },
            { .reg = 0x00009888, .val = 0x53900000 },
            { .reg = 0x00009840, .val = 0x00000080 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache metrics set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "9fb22842-e708-43f7-9752-e0e41670c39e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x166C0760 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F901403 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4E8020 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1CE000 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2A00 },
         { .reg = 0x00009888, .val = 0x0C4C0280 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F1500 },
         { .reg = 0x00009888, .val = 0x100F0140 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x182C00A0 },
         { .reg = 0x00009888, .val = 0x03933300 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900167 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190030F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900042 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x53901111 },
         { .reg = 0x00009888, .val = 0x43900420 },
         { .reg = 0x00009840, .val = 0x00000080 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "b4aa5e13-2d25-418f-827d-421f5cc4c43b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x106C0232 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x004F1880 },
         { .reg = 0x00009888, .val = 0x024F08BB },
         { .reg = 0x00009888, .val = 0x044F001B },
         { .reg = 0x00009888, .val = 0x046C0100 },
         { .reg = 0x00009888, .val = 0x066C000B },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x041B8000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025BC000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x165C8000 },
         { .reg = 0x00009888, .val = 0x185C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00A0 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x062CC000 },
         { .reg = 0x00009888, .val = 0x082CC000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x1D950080 },
         { .reg = 0x00009888, .val = 0x13928000 },
         { .reg = 0x00009888, .val = 0x0F988000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B9000A0 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "504000e8-64f2-4aac-8301-51b81aa60d45";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x126C7B40 },
         { .reg = 0x00009888, .val = 0x166C0020 },
         { .reg = 0x00009888, .val = 0x0A603444 },
         { .reg = 0x00009888, .val = 0x0A613400 },
         { .reg = 0x00009888, .val = 0x1A4EA800 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0800 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x1C1C003C },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x10600000 },
         { .reg = 0x00009888, .val = 0x04600000 },
         { .reg = 0x00009888, .val = 0x0C610044 },
         { .reg = 0x00009888, .val = 0x10610000 },
         { .reg = 0x00009888, .val = 0x06610000 },
         { .reg = 0x00009888, .val = 0x0C4C02A8 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0154 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190FFC0 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900021 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900400 },
         { .reg = 0x00009888, .val = 0x43900421 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "f542479b-6685-4670-ad8e-9fe282a2eb5b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x126C02E0 },
         { .reg = 0x00009888, .val = 0x146C0001 },
         { .reg = 0x00009888, .val = 0x0A623400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x026C3324 },
         { .reg = 0x00009888, .val = 0x046C3422 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x06614000 },
         { .reg = 0x00009888, .val = 0x0C620044 },
         { .reg = 0x00009888, .val = 0x10620000 },
         { .reg = 0x00009888, .val = 0x06620000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_2__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_2__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "1cbf1e9f-c2de-4cc2-8d1c-2c33c25a2332";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x126C4E80 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x0A633400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x026C3321 },
         { .reg = 0x00009888, .val = 0x046C342F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C2000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x06604000 },
         { .reg = 0x00009888, .val = 0x0C630044 },
         { .reg = 0x00009888, .val = 0x10630000 },
         { .reg = 0x00009888, .val = 0x06630000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00AA },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "b4e54cc8-a3a0-4ccd-9990-65d3184e5a38";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x102F3800 },
         { .reg = 0x00009888, .val = 0x144D0500 },
         { .reg = 0x00009888, .val = 0x120D03C0 },
         { .reg = 0x00009888, .val = 0x140D03CF },
         { .reg = 0x00009888, .val = 0x0C0F0004 },
         { .reg = 0x00009888, .val = 0x0C4E4000 },
         { .reg = 0x00009888, .val = 0x042F0480 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x022F0000 },
         { .reg = 0x00009888, .val = 0x0A4C0090 },
         { .reg = 0x00009888, .val = 0x064D0027 },
         { .reg = 0x00009888, .val = 0x004D0000 },
         { .reg = 0x00009888, .val = 0x000D0D40 },
         { .reg = 0x00009888, .val = 0x020D803F },
         { .reg = 0x00009888, .val = 0x040D8023 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020F0010 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0050 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x43901485 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "efe7eba4-fd9e-4a09-a92b-334970b5de57";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x14152C00 },
         { .reg = 0x00009888, .val = 0x16150005 },
         { .reg = 0x00009888, .val = 0x121600A0 },
         { .reg = 0x00009888, .val = 0x14352C00 },
         { .reg = 0x00009888, .val = 0x16350005 },
         { .reg = 0x00009888, .val = 0x123600A0 },
         { .reg = 0x00009888, .val = 0x14552C00 },
         { .reg = 0x00009888, .val = 0x16550005 },
         { .reg = 0x00009888, .val = 0x125600A0 },
         { .reg = 0x00009888, .val = 0x062F6000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x0C4C0050 },
         { .reg = 0x00009888, .val = 0x0A4C0010 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0350 },
         { .reg = 0x00009888, .val = 0x0C0FB000 },
         { .reg = 0x00009888, .val = 0x0E0F00DA },
         { .reg = 0x00009888, .val = 0x182C0028 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x022DC000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x0C138000 },
         { .reg = 0x00009888, .val = 0x0E132000 },
         { .reg = 0x00009888, .val = 0x0413C000 },
         { .reg = 0x00009888, .val = 0x1C140018 },
         { .reg = 0x00009888, .val = 0x0C157000 },
         { .reg = 0x00009888, .val = 0x0E150078 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04162180 },
         { .reg = 0x00009888, .val = 0x02160000 },
         { .reg = 0x00009888, .val = 0x04174000 },
         { .reg = 0x00009888, .val = 0x0233A000 },
         { .reg = 0x00009888, .val = 0x04333000 },
         { .reg = 0x00009888, .val = 0x14348000 },
         { .reg = 0x00009888, .val = 0x16348000 },
         { .reg = 0x00009888, .val = 0x02357870 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04360043 },
         { .reg = 0x00009888, .val = 0x02360000 },
         { .reg = 0x00009888, .val = 0x04371000 },
         { .reg = 0x00009888, .val = 0x0E538000 },
         { .reg = 0x00009888, .val = 0x00538000 },
         { .reg = 0x00009888, .val = 0x06533000 },
         { .reg = 0x00009888, .val = 0x1C540020 },
         { .reg = 0x00009888, .val = 0x12548000 },
         { .reg = 0x00009888, .val = 0x0E557000 },
         { .reg = 0x00009888, .val = 0x00557800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06560043 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x06571000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900060 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900060 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "e9bd0bbb-c7b7-4a26-a27a-246987354776";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x12120000 },
         { .reg = 0x00009888, .val = 0x12320000 },
         { .reg = 0x00009888, .val = 0x12520000 },
         { .reg = 0x00009888, .val = 0x002F8000 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0015 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F03A0 },
         { .reg = 0x00009888, .val = 0x0C0FF000 },
         { .reg = 0x00009888, .val = 0x0E0F0095 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x02108000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x02118000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x02121880 },
         { .reg = 0x00009888, .val = 0x041219B5 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x02134000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x0C308000 },
         { .reg = 0x00009888, .val = 0x0E304000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x0C318000 },
         { .reg = 0x00009888, .val = 0x0E314000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x0C321A80 },
         { .reg = 0x00009888, .val = 0x0E320033 },
         { .reg = 0x00009888, .val = 0x06320031 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x0C334000 },
         { .reg = 0x00009888, .val = 0x0E331000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0E508000 },
         { .reg = 0x00009888, .val = 0x00508000 },
         { .reg = 0x00009888, .val = 0x02504000 },
         { .reg = 0x00009888, .val = 0x0E518000 },
         { .reg = 0x00009888, .val = 0x00518000 },
         { .reg = 0x00009888, .val = 0x02514000 },
         { .reg = 0x00009888, .val = 0x0E521880 },
         { .reg = 0x00009888, .val = 0x00521A80 },
         { .reg = 0x00009888, .val = 0x02520033 },
         { .reg = 0x00009888, .val = 0x0E534000 },
         { .reg = 0x00009888, .val = 0x00534000 },
         { .reg = 0x00009888, .val = 0x02531000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900062 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "0a7accbf-8c55-455f-9984-23e9fb0b826d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x000000A0 },
         { .reg = 0x00009888, .val = 0x12124D60 },
         { .reg = 0x00009888, .val = 0x12322E60 },
         { .reg = 0x00009888, .val = 0x12524D60 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0014 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0FE000 },
         { .reg = 0x00009888, .val = 0x0E0F0097 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x002D8000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x04121FB7 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x00308000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x00318000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x00321B80 },
         { .reg = 0x00009888, .val = 0x0632003F },
         { .reg = 0x00009888, .val = 0x00334000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0250C000 },
         { .reg = 0x00009888, .val = 0x0251C000 },
         { .reg = 0x00009888, .val = 0x02521FB7 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x02535000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900063 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x000091BC, .val = 0xE0500000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "41a24047-7484-4ead-ae37-de907e5ff2b2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121203E0 },
         { .reg = 0x00009888, .val = 0x123203E0 },
         { .reg = 0x00009888, .val = 0x125203E0 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0E0F006C },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x042D8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06114000 },
         { .reg = 0x00009888, .val = 0x06120033 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04308000 },
         { .reg = 0x00009888, .val = 0x04318000 },
         { .reg = 0x00009888, .val = 0x04321980 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x04334000 },
         { .reg = 0x00009888, .val = 0x04504000 },
         { .reg = 0x00009888, .val = 0x04514000 },
         { .reg = 0x00009888, .val = 0x04520033 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x04531000 },
         { .reg = 0x00009888, .val = 0x1190E000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C00 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00001000 },
         { .reg = 0x0000E558, .val = 0x00003002 },
         { .reg = 0x0000E658, .val = 0x00005004 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00050012 },
         { .reg = 0x0000E55C, .val = 0x00052051 },
         { .reg = 0x0000E65C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metrics set";
   query->symbol_name = "VMEPipe";
   query->guid = "95910492-943f-44bd-9461-390240f243fd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A5800 },
         { .reg = 0x00009888, .val = 0x161A00C0 },
         { .reg = 0x00009888, .val = 0x12180240 },
         { .reg = 0x00009888, .val = 0x14180002 },
         { .reg = 0x00009888, .val = 0x143A5800 },
         { .reg = 0x00009888, .val = 0x163A00C0 },
         { .reg = 0x00009888, .val = 0x12380240 },
         { .reg = 0x00009888, .val = 0x14380002 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F3000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C1500 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F9500 },
         { .reg = 0x00009888, .val = 0x100F002A },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x0A2DC000 },
         { .reg = 0x00009888, .val = 0x0C2DC000 },
         { .reg = 0x00009888, .val = 0x04193000 },
         { .reg = 0x00009888, .val = 0x081A28C1 },
         { .reg = 0x00009888, .val = 0x001A0000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x0613C000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x00172000 },
         { .reg = 0x00009888, .val = 0x06178000 },
         { .reg = 0x00009888, .val = 0x0817A000 },
         { .reg = 0x00009888, .val = 0x00180037 },
         { .reg = 0x00009888, .val = 0x06180940 },
         { .reg = 0x00009888, .val = 0x08180000 },
         { .reg = 0x00009888, .val = 0x02180000 },
         { .reg = 0x00009888, .val = 0x04183000 },
         { .reg = 0x00009888, .val = 0x06393000 },
         { .reg = 0x00009888, .val = 0x0C3A28C1 },
         { .reg = 0x00009888, .val = 0x003A0000 },
         { .reg = 0x00009888, .val = 0x0A33F000 },
         { .reg = 0x00009888, .val = 0x0C33F000 },
         { .reg = 0x00009888, .val = 0x0A37A000 },
         { .reg = 0x00009888, .val = 0x0C37A000 },
         { .reg = 0x00009888, .val = 0x0A380977 },
         { .reg = 0x00009888, .val = 0x08380000 },
         { .reg = 0x00009888, .val = 0x04380000 },
         { .reg = 0x00009888, .val = 0x06383000 },
         { .reg = 0x00009888, .val = 0x119000FF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900800 },
         { .reg = 0x00009888, .val = 0x47901000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900844 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "c44a5cf7-886d-477b-bebd-2d738923e4c3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 8);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C00 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x21C05800 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x1D810400 },
         { .reg = 0x00009888, .val = 0x07960025 },
         { .reg = 0x00009888, .val = 0x21960000 },
         { .reg = 0x00009888, .val = 0x0B964000 },
         { .reg = 0x00009888, .val = 0x1B930062 },
         { .reg = 0x00009888, .val = 0x17948000 },
         { .reg = 0x00009888, .val = 0x1B940008 },
         { .reg = 0x00009888, .val = 0x05950075 },
         { .reg = 0x00009888, .val = 0x1D950000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x09978000 },
         { .reg = 0x00009888, .val = 0x0BC000A5 },
         { .reg = 0x00009888, .val = 0x11C00000 },
         { .reg = 0x00009888, .val = 0x05C00000 },
         { .reg = 0x00009888, .val = 0x0BC54000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C60 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00078000 },
         { .reg = 0x00002774, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_ff_bottlenecks_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "FF Bottlenecks";
   query->symbol_name = "FfBottlenecks";
   query->guid = "e0b2aae4-9b8f-4211-8df0-f8e8cb203209";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 30);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11850019 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x19867C00 },
         { .reg = 0x00009888, .val = 0x01870000 },
         { .reg = 0x00009888, .val = 0x15880000 },
         { .reg = 0x00009888, .val = 0x01898000 },
         { .reg = 0x00009888, .val = 0x0B898000 },
         { .reg = 0x00009888, .val = 0x0D890C00 },
         { .reg = 0x00009888, .val = 0x0D808000 },
         { .reg = 0x00009888, .val = 0x118A8000 },
         { .reg = 0x00009888, .val = 0x1B8A2000 },
         { .reg = 0x00009888, .val = 0x1D8A0003 },
         { .reg = 0x00009888, .val = 0x1F810002 },
         { .reg = 0x00009888, .val = 0x0982C000 },
         { .reg = 0x00009888, .val = 0x0D824000 },
         { .reg = 0x00009888, .val = 0x05833000 },
         { .reg = 0x00009888, .val = 0x07831000 },
         { .reg = 0x00009888, .val = 0x0D848000 },
         { .reg = 0x00009888, .val = 0x0D850080 },
         { .reg = 0x00009888, .val = 0x01850000 },
         { .reg = 0x00009888, .val = 0x09860080 },
         { .reg = 0x00009888, .val = 0x0D860080 },
         { .reg = 0x00009888, .val = 0x07870C80 },
         { .reg = 0x00009888, .val = 0x0B870C80 },
         { .reg = 0x00009888, .val = 0x01880032 },
         { .reg = 0x00009888, .val = 0x0B888032 },
         { .reg = 0x00009888, .val = 0x11880000 },
         { .reg = 0x00009888, .val = 0x07888000 },
         { .reg = 0x00009888, .val = 0x1D930010 },
         { .reg = 0x00009888, .val = 0x1D950013 },
         { .reg = 0x00009888, .val = 0x0DE58000 },
         { .reg = 0x00009888, .val = 0x05924000 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D92C000 },
         { .reg = 0x00009888, .val = 0x0F92C000 },
         { .reg = 0x00009888, .val = 0x11928000 },
         { .reg = 0x00009888, .val = 0x01981000 },
         { .reg = 0x00009888, .val = 0x07984000 },
         { .reg = 0x00009888, .val = 0x0998A000 },
         { .reg = 0x00009888, .val = 0x0B985000 },
         { .reg = 0x00009888, .val = 0x0D982000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29904000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2D908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900020 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900400 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00000011 },
         { .reg = 0x0000E758, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__ff_bottlenecks__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__ff_bottlenecks__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__vs_cl_bypass__read;
      counter->name = "VS is sending vertices to CL";
      counter->desc = "The percentage of time in which VS is sending vertices to CL bypassing other pipeline stages. Unit: percent.";
      counter->symbol_name = "VsClBypass";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__te_bottleneck__read;
      counter->name = "TE Bottleneck";
      counter->desc = "The percentage of time in which tesselation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "TeBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 180;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__ff_bottlenecks__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 184;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "c5384091-ce4c-406c-ab4d-4c530f71c7ae";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810000 },
         { .reg = 0x00009888, .val = 0x07810016 },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930040 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "27871149-2fa9-40ba-aa73-350d60c03a09";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x122D3080 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0800 },
         { .reg = 0x00009888, .val = 0x0E0FAA00 },
         { .reg = 0x00009888, .val = 0x100F0002 },
         { .reg = 0x00009888, .val = 0x002D0025 },
         { .reg = 0x00009888, .val = 0x062D1300 },
         { .reg = 0x00009888, .val = 0x082D16A4 },
         { .reg = 0x00009888, .val = 0x0A2D162E },
         { .reg = 0x00009888, .val = 0x102D0000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt2__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt2_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute";
   query->symbol_name = "AsyncCompute";
   query->guid = "04769cac-6809-4f2a-af5d-40022435a8b2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt2__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt2__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt2__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_sklgt2(struct intel_perf_config *perf)
{
   sklgt2_register_render_basic_counter_query(perf);
   sklgt2_register_compute_basic_counter_query(perf);
   sklgt2_register_render_pipe_profile_counter_query(perf);
   sklgt2_register_memory_reads_counter_query(perf);
   sklgt2_register_memory_writes_counter_query(perf);
   sklgt2_register_compute_extended_counter_query(perf);
   sklgt2_register_compute_l3_cache_counter_query(perf);
   sklgt2_register_hdc_and_sf_counter_query(perf);
   sklgt2_register_l3_1_counter_query(perf);
   sklgt2_register_l3_2_counter_query(perf);
   sklgt2_register_l3_3_counter_query(perf);
   sklgt2_register_rasterizer_and_pixel_backend_counter_query(perf);
   sklgt2_register_sampler_counter_query(perf);
   sklgt2_register_tdl_1_counter_query(perf);
   sklgt2_register_tdl_2_counter_query(perf);
   sklgt2_register_compute_extra_counter_query(perf);
   sklgt2_register_vme_pipe_counter_query(perf);
   sklgt2_register_gpu_busyness_counter_query(perf);
   sklgt2_register_ff_bottlenecks_counter_query(perf);
   sklgt2_register_test_oa_counter_query(perf);
   sklgt2_register_pma__stall_counter_query(perf);
   sklgt2_register_async_compute_counter_query(perf);
}


static void
sklgt3_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "21fef15a-83f4-4ffa-bb81-7da6e38b8e4b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C01E0 },
         { .reg = 0x00009888, .val = 0x12170280 },
         { .reg = 0x00009888, .val = 0x12370280 },
         { .reg = 0x00009888, .val = 0x16EC01E0 },
         { .reg = 0x00009888, .val = 0x11930317 },
         { .reg = 0x00009888, .val = 0x159303DF },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x1A4E0380 },
         { .reg = 0x00009888, .val = 0x0A6C0053 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x0A1B4000 },
         { .reg = 0x00009888, .val = 0x1C1C0001 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x042F1000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C8400 },
         { .reg = 0x00009888, .val = 0x0C4C0002 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F6600 },
         { .reg = 0x00009888, .val = 0x100F0001 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x162CA200 },
         { .reg = 0x00009888, .val = 0x062D8000 },
         { .reg = 0x00009888, .val = 0x082D8000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x08133000 },
         { .reg = 0x00009888, .val = 0x00170020 },
         { .reg = 0x00009888, .val = 0x08170021 },
         { .reg = 0x00009888, .val = 0x10170000 },
         { .reg = 0x00009888, .val = 0x0633C000 },
         { .reg = 0x00009888, .val = 0x0833C000 },
         { .reg = 0x00009888, .val = 0x06370800 },
         { .reg = 0x00009888, .val = 0x08370840 },
         { .reg = 0x00009888, .val = 0x10370000 },
         { .reg = 0x00009888, .val = 0x1ACE0200 },
         { .reg = 0x00009888, .val = 0x0AEC5300 },
         { .reg = 0x00009888, .val = 0x10EC0000 },
         { .reg = 0x00009888, .val = 0x1CEC0000 },
         { .reg = 0x00009888, .val = 0x0A9B8000 },
         { .reg = 0x00009888, .val = 0x1C9C0002 },
         { .reg = 0x00009888, .val = 0x0CCC0002 },
         { .reg = 0x00009888, .val = 0x0A8D8000 },
         { .reg = 0x00009888, .val = 0x108F0001 },
         { .reg = 0x00009888, .val = 0x16AC8000 },
         { .reg = 0x00009888, .val = 0x0D933031 },
         { .reg = 0x00009888, .val = 0x0F933E3F },
         { .reg = 0x00009888, .val = 0x01933D00 },
         { .reg = 0x00009888, .val = 0x0393073C },
         { .reg = 0x00009888, .val = 0x0593000E },
         { .reg = 0x00009888, .val = 0x1D930000 },
         { .reg = 0x00009888, .val = 0x19930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D908000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51907710 },
         { .reg = 0x00009888, .val = 0x419020A0 },
         { .reg = 0x00009888, .val = 0x55901515 },
         { .reg = 0x00009888, .val = 0x45900529 },
         { .reg = 0x00009888, .val = 0x47901025 },
         { .reg = 0x00009888, .val = 0x57907770 },
         { .reg = 0x00009888, .val = 0x49902100 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900108 },
         { .reg = 0x00009888, .val = 0x59900007 },
         { .reg = 0x00009888, .val = 0x43902108 },
         { .reg = 0x00009888, .val = 0x53907777 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "4320492b-fd03-42ac-922f-dbe1ef3b7b58";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x1A4E0820 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x064F0900 },
         { .reg = 0x00009888, .val = 0x084F0032 },
         { .reg = 0x00009888, .val = 0x0A4F1891 },
         { .reg = 0x00009888, .val = 0x0C4F0E00 },
         { .reg = 0x00009888, .val = 0x0E4F003C },
         { .reg = 0x00009888, .val = 0x004F0D80 },
         { .reg = 0x00009888, .val = 0x024F003B },
         { .reg = 0x00009888, .val = 0x006C0002 },
         { .reg = 0x00009888, .val = 0x086C0100 },
         { .reg = 0x00009888, .val = 0x0C6C000C },
         { .reg = 0x00009888, .val = 0x0E6C0B00 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x081B8000 },
         { .reg = 0x00009888, .val = 0x0C1B4000 },
         { .reg = 0x00009888, .val = 0x0E1B8000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1C8000 },
         { .reg = 0x00009888, .val = 0x1C1C0024 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5BC000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x1A5C6000 },
         { .reg = 0x00009888, .val = 0x1C5C001B },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2000 },
         { .reg = 0x00009888, .val = 0x0C4C0208 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2CC000 },
         { .reg = 0x00009888, .val = 0x162CFB00 },
         { .reg = 0x00009888, .val = 0x182C00BE },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x19900157 },
         { .reg = 0x00009888, .val = 0x1B900158 },
         { .reg = 0x00009888, .val = 0x1D900105 },
         { .reg = 0x00009888, .val = 0x1F900103 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x11900FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900863 },
         { .reg = 0x00009888, .val = 0x47900802 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900802 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900002 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900C62 },
         { .reg = 0x00009888, .val = 0x53903333 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "bd2d9cae-b9ec-4f5b-9d2f-934bed398a2d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C0E001F },
         { .reg = 0x00009888, .val = 0x0A0F0000 },
         { .reg = 0x00009888, .val = 0x10116800 },
         { .reg = 0x00009888, .val = 0x178A03E0 },
         { .reg = 0x00009888, .val = 0x11824C00 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x13840020 },
         { .reg = 0x00009888, .val = 0x11850019 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x01870C40 },
         { .reg = 0x00009888, .val = 0x17880000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020E5400 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x080F0040 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0040 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06110012 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x01898000 },
         { .reg = 0x00009888, .val = 0x0D890100 },
         { .reg = 0x00009888, .val = 0x03898000 },
         { .reg = 0x00009888, .val = 0x09808000 },
         { .reg = 0x00009888, .val = 0x0B808000 },
         { .reg = 0x00009888, .val = 0x0380C000 },
         { .reg = 0x00009888, .val = 0x0F8A0075 },
         { .reg = 0x00009888, .val = 0x1D8A0000 },
         { .reg = 0x00009888, .val = 0x118A8000 },
         { .reg = 0x00009888, .val = 0x1B8A4000 },
         { .reg = 0x00009888, .val = 0x138A8000 },
         { .reg = 0x00009888, .val = 0x1D81A000 },
         { .reg = 0x00009888, .val = 0x15818000 },
         { .reg = 0x00009888, .val = 0x17818000 },
         { .reg = 0x00009888, .val = 0x0B820030 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x0D824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x05824000 },
         { .reg = 0x00009888, .val = 0x0D830003 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x03838000 },
         { .reg = 0x00009888, .val = 0x07838000 },
         { .reg = 0x00009888, .val = 0x0B840980 },
         { .reg = 0x00009888, .val = 0x03844D80 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x09850080 },
         { .reg = 0x00009888, .val = 0x03850003 },
         { .reg = 0x00009888, .val = 0x01850000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x09870032 },
         { .reg = 0x00009888, .val = 0x01888052 },
         { .reg = 0x00009888, .val = 0x11880000 },
         { .reg = 0x00009888, .val = 0x09884000 },
         { .reg = 0x00009888, .val = 0x1B931001 },
         { .reg = 0x00009888, .val = 0x1D930001 },
         { .reg = 0x00009888, .val = 0x19934000 },
         { .reg = 0x00009888, .val = 0x1B958000 },
         { .reg = 0x00009888, .val = 0x1D950094 },
         { .reg = 0x00009888, .val = 0x19958000 },
         { .reg = 0x00009888, .val = 0x09E58000 },
         { .reg = 0x00009888, .val = 0x0BE58000 },
         { .reg = 0x00009888, .val = 0x03E5C000 },
         { .reg = 0x00009888, .val = 0x0592C000 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D924000 },
         { .reg = 0x00009888, .val = 0x0F924000 },
         { .reg = 0x00009888, .val = 0x11928000 },
         { .reg = 0x00009888, .val = 0x1392C000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x01985000 },
         { .reg = 0x00009888, .val = 0x07988000 },
         { .reg = 0x00009888, .val = 0x09981000 },
         { .reg = 0x00009888, .val = 0x0B982000 },
         { .reg = 0x00009888, .val = 0x0D982000 },
         { .reg = 0x00009888, .val = 0x0F989000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1190C080 },
         { .reg = 0x00009888, .val = 0x51901150 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x55905111 },
         { .reg = 0x00009888, .val = 0x45901400 },
         { .reg = 0x00009888, .val = 0x479004A5 },
         { .reg = 0x00009888, .val = 0x57903455 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B9000A0 },
         { .reg = 0x00009888, .val = 0x59900001 },
         { .reg = 0x00009888, .val = 0x43900005 },
         { .reg = 0x00009888, .val = 0x53900455 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "4ca0f3fe-7fd3-4924-98cb-1807d9879767";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900064 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900150 },
         { .reg = 0x00009888, .val = 0x21900151 },
         { .reg = 0x00009888, .val = 0x23900152 },
         { .reg = 0x00009888, .val = 0x25900153 },
         { .reg = 0x00009888, .val = 0x27900154 },
         { .reg = 0x00009888, .val = 0x29900155 },
         { .reg = 0x00009888, .val = 0x2B900156 },
         { .reg = 0x00009888, .val = 0x2D900157 },
         { .reg = 0x00009888, .val = 0x2F90015F },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "a0c0172c-ee13-403d-99ff-2bdf6936cf14";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900160 },
         { .reg = 0x00009888, .val = 0x21900161 },
         { .reg = 0x00009888, .val = 0x23900162 },
         { .reg = 0x00009888, .val = 0x25900163 },
         { .reg = 0x00009888, .val = 0x27900164 },
         { .reg = 0x00009888, .val = 0x29900165 },
         { .reg = 0x00009888, .val = 0x2B900166 },
         { .reg = 0x00009888, .val = 0x2D900167 },
         { .reg = 0x00009888, .val = 0x2F900150 },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended set";
   query->symbol_name = "ComputeExtended";
   query->guid = "52435e0b-f188-42ea-8680-21a56ee20dee";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x141C8160 },
         { .reg = 0x00009888, .val = 0x161C8015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4EAAA0 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0E6C0B01 },
         { .reg = 0x00009888, .val = 0x006C0200 },
         { .reg = 0x00009888, .val = 0x026C000C },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x001C0041 },
         { .reg = 0x00009888, .val = 0x061C4200 },
         { .reg = 0x00009888, .val = 0x081C4443 },
         { .reg = 0x00009888, .val = 0x0A1C4645 },
         { .reg = 0x00009888, .val = 0x0C1C7647 },
         { .reg = 0x00009888, .val = 0x041C7357 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x1A1C0000 },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4CAA2A },
         { .reg = 0x00009888, .val = 0x0C4C02AA },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5515 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x11907FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900802 },
         { .reg = 0x00009888, .val = 0x47900842 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900842 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900800 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "27076eeb-49f3-4fed-8423-c66506005c63";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C0760 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4E8020 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1CE000 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2A00 },
         { .reg = 0x00009888, .val = 0x0C4C0280 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F1500 },
         { .reg = 0x00009888, .val = 0x100F0140 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x182C00A0 },
         { .reg = 0x00009888, .val = 0x03933300 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190030F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900063 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x53903333 },
         { .reg = 0x00009888, .val = 0x43900840 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "a1b8363b-b986-47b6-929a-847afde3dec8";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x106C0232 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x004F1880 },
         { .reg = 0x00009888, .val = 0x024F08BB },
         { .reg = 0x00009888, .val = 0x044F001B },
         { .reg = 0x00009888, .val = 0x046C0100 },
         { .reg = 0x00009888, .val = 0x066C000B },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x041B8000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025BC000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x165C8000 },
         { .reg = 0x00009888, .val = 0x185C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00A0 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x062CC000 },
         { .reg = 0x00009888, .val = 0x082CC000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x1D950080 },
         { .reg = 0x00009888, .val = 0x13928000 },
         { .reg = 0x00009888, .val = 0x0F988000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900005 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "fa857069-7f95-44c9-a3e0-cb8ccbc35abb";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C7B40 },
         { .reg = 0x00009888, .val = 0x166C0020 },
         { .reg = 0x00009888, .val = 0x0A603444 },
         { .reg = 0x00009888, .val = 0x0A613400 },
         { .reg = 0x00009888, .val = 0x1A4EA800 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0800 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x1C1C003C },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x10600000 },
         { .reg = 0x00009888, .val = 0x04600000 },
         { .reg = 0x00009888, .val = 0x0C610044 },
         { .reg = 0x00009888, .val = 0x10610000 },
         { .reg = 0x00009888, .val = 0x06610000 },
         { .reg = 0x00009888, .val = 0x0C4C02A8 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0154 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190FFC0 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900021 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900400 },
         { .reg = 0x00009888, .val = 0x43900421 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "9830a5f3-a5e2-4773-a49d-6371407c382a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C02E0 },
         { .reg = 0x00009888, .val = 0x146C0001 },
         { .reg = 0x00009888, .val = 0x0A623400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x026C3324 },
         { .reg = 0x00009888, .val = 0x046C3422 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x06614000 },
         { .reg = 0x00009888, .val = 0x0C620044 },
         { .reg = 0x00009888, .val = 0x10620000 },
         { .reg = 0x00009888, .val = 0x06620000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_2__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_2__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "59452ceb-24bb-4878-87ec-87aab7b4f9db";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C4E80 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x0A633400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x026C3321 },
         { .reg = 0x00009888, .val = 0x046C342F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C2000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x06604000 },
         { .reg = 0x00009888, .val = 0x0C630044 },
         { .reg = 0x00009888, .val = 0x10630000 },
         { .reg = 0x00009888, .val = 0x06630000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00AA },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "d2249ff7-34b4-4203-b474-7c11546f9dae";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102F3800 },
         { .reg = 0x00009888, .val = 0x144D0500 },
         { .reg = 0x00009888, .val = 0x120D03C0 },
         { .reg = 0x00009888, .val = 0x140D03CF },
         { .reg = 0x00009888, .val = 0x0C0F0004 },
         { .reg = 0x00009888, .val = 0x0C4E4000 },
         { .reg = 0x00009888, .val = 0x042F0480 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x022F0000 },
         { .reg = 0x00009888, .val = 0x0A4C0090 },
         { .reg = 0x00009888, .val = 0x064D0027 },
         { .reg = 0x00009888, .val = 0x004D0000 },
         { .reg = 0x00009888, .val = 0x000D0D40 },
         { .reg = 0x00009888, .val = 0x020D803F },
         { .reg = 0x00009888, .val = 0x040D8023 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020F0010 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0050 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x43901485 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "c9cf7c63-0065-4226-941e-98590bad8f75";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x14152C00 },
         { .reg = 0x00009888, .val = 0x16150005 },
         { .reg = 0x00009888, .val = 0x121600A0 },
         { .reg = 0x00009888, .val = 0x14352C00 },
         { .reg = 0x00009888, .val = 0x16350005 },
         { .reg = 0x00009888, .val = 0x123600A0 },
         { .reg = 0x00009888, .val = 0x14552C00 },
         { .reg = 0x00009888, .val = 0x16550005 },
         { .reg = 0x00009888, .val = 0x125600A0 },
         { .reg = 0x00009888, .val = 0x062F6000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x0C4C0050 },
         { .reg = 0x00009888, .val = 0x0A4C0010 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0350 },
         { .reg = 0x00009888, .val = 0x0C0FB000 },
         { .reg = 0x00009888, .val = 0x0E0F00DA },
         { .reg = 0x00009888, .val = 0x182C0028 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x022DC000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x0C138000 },
         { .reg = 0x00009888, .val = 0x0E132000 },
         { .reg = 0x00009888, .val = 0x0413C000 },
         { .reg = 0x00009888, .val = 0x1C140018 },
         { .reg = 0x00009888, .val = 0x0C157000 },
         { .reg = 0x00009888, .val = 0x0E150078 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04162180 },
         { .reg = 0x00009888, .val = 0x02160000 },
         { .reg = 0x00009888, .val = 0x04174000 },
         { .reg = 0x00009888, .val = 0x0233A000 },
         { .reg = 0x00009888, .val = 0x04333000 },
         { .reg = 0x00009888, .val = 0x14348000 },
         { .reg = 0x00009888, .val = 0x16348000 },
         { .reg = 0x00009888, .val = 0x02357870 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04360043 },
         { .reg = 0x00009888, .val = 0x02360000 },
         { .reg = 0x00009888, .val = 0x04371000 },
         { .reg = 0x00009888, .val = 0x0E538000 },
         { .reg = 0x00009888, .val = 0x00538000 },
         { .reg = 0x00009888, .val = 0x06533000 },
         { .reg = 0x00009888, .val = 0x1C540020 },
         { .reg = 0x00009888, .val = 0x12548000 },
         { .reg = 0x00009888, .val = 0x0E557000 },
         { .reg = 0x00009888, .val = 0x00557800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06560043 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x06571000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900060 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900060 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "e3bb1501-89ca-43fb-8668-9d96159e10a2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12120000 },
         { .reg = 0x00009888, .val = 0x12320000 },
         { .reg = 0x00009888, .val = 0x12520000 },
         { .reg = 0x00009888, .val = 0x002F8000 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0015 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F03A0 },
         { .reg = 0x00009888, .val = 0x0C0FF000 },
         { .reg = 0x00009888, .val = 0x0E0F0095 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x02108000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x02118000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x02121880 },
         { .reg = 0x00009888, .val = 0x041219B5 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x02134000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x0C308000 },
         { .reg = 0x00009888, .val = 0x0E304000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x0C318000 },
         { .reg = 0x00009888, .val = 0x0E314000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x0C321A80 },
         { .reg = 0x00009888, .val = 0x0E320033 },
         { .reg = 0x00009888, .val = 0x06320031 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x0C334000 },
         { .reg = 0x00009888, .val = 0x0E331000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0E508000 },
         { .reg = 0x00009888, .val = 0x00508000 },
         { .reg = 0x00009888, .val = 0x02504000 },
         { .reg = 0x00009888, .val = 0x0E518000 },
         { .reg = 0x00009888, .val = 0x00518000 },
         { .reg = 0x00009888, .val = 0x02514000 },
         { .reg = 0x00009888, .val = 0x0E521880 },
         { .reg = 0x00009888, .val = 0x00521A80 },
         { .reg = 0x00009888, .val = 0x02520033 },
         { .reg = 0x00009888, .val = 0x0E534000 },
         { .reg = 0x00009888, .val = 0x00534000 },
         { .reg = 0x00009888, .val = 0x02531000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900062 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "29194a55-7e75-4152-a71f-ef34b1ae4cca";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12124D60 },
         { .reg = 0x00009888, .val = 0x12322E60 },
         { .reg = 0x00009888, .val = 0x12524D60 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0014 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0FE000 },
         { .reg = 0x00009888, .val = 0x0E0F0097 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x002D8000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x04121FB7 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x00308000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x00318000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x00321B80 },
         { .reg = 0x00009888, .val = 0x0632003F },
         { .reg = 0x00009888, .val = 0x00334000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0250C000 },
         { .reg = 0x00009888, .val = 0x0251C000 },
         { .reg = 0x00009888, .val = 0x02521FB7 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x02535000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900063 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "2d80a648-7b5a-4e92-bbe7-3b5c76f2e221";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121203E0 },
         { .reg = 0x00009888, .val = 0x123203E0 },
         { .reg = 0x00009888, .val = 0x125203E0 },
         { .reg = 0x00009888, .val = 0x129203E0 },
         { .reg = 0x00009888, .val = 0x12B203E0 },
         { .reg = 0x00009888, .val = 0x12D203E0 },
         { .reg = 0x00009888, .val = 0x024EC000 },
         { .reg = 0x00009888, .val = 0x044EC000 },
         { .reg = 0x00009888, .val = 0x064EC000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C0042 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F006D },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x042D8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06114000 },
         { .reg = 0x00009888, .val = 0x06120033 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04308000 },
         { .reg = 0x00009888, .val = 0x04318000 },
         { .reg = 0x00009888, .val = 0x04321980 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x04334000 },
         { .reg = 0x00009888, .val = 0x04504000 },
         { .reg = 0x00009888, .val = 0x04514000 },
         { .reg = 0x00009888, .val = 0x04520033 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x04531000 },
         { .reg = 0x00009888, .val = 0x00AF8000 },
         { .reg = 0x00009888, .val = 0x0ACC0001 },
         { .reg = 0x00009888, .val = 0x008D8000 },
         { .reg = 0x00009888, .val = 0x028DA000 },
         { .reg = 0x00009888, .val = 0x0C8FB000 },
         { .reg = 0x00009888, .val = 0x0E8F0001 },
         { .reg = 0x00009888, .val = 0x06AC8000 },
         { .reg = 0x00009888, .val = 0x02AD4000 },
         { .reg = 0x00009888, .val = 0x02908000 },
         { .reg = 0x00009888, .val = 0x02918000 },
         { .reg = 0x00009888, .val = 0x02921980 },
         { .reg = 0x00009888, .val = 0x00920000 },
         { .reg = 0x00009888, .val = 0x02934000 },
         { .reg = 0x00009888, .val = 0x02B04000 },
         { .reg = 0x00009888, .val = 0x02B14000 },
         { .reg = 0x00009888, .val = 0x02B20033 },
         { .reg = 0x00009888, .val = 0x00B20000 },
         { .reg = 0x00009888, .val = 0x02B31000 },
         { .reg = 0x00009888, .val = 0x00D08000 },
         { .reg = 0x00009888, .val = 0x00D18000 },
         { .reg = 0x00009888, .val = 0x00D21980 },
         { .reg = 0x00009888, .val = 0x00D34000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900402 },
         { .reg = 0x00009888, .val = 0x53901550 },
         { .reg = 0x00009888, .val = 0x45900080 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metrics set";
   query->symbol_name = "VMEPipe";
   query->guid = "cfae9232-6ffc-42cc-a703-9790016925f0";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A5800 },
         { .reg = 0x00009888, .val = 0x161A00C0 },
         { .reg = 0x00009888, .val = 0x12180240 },
         { .reg = 0x00009888, .val = 0x14180002 },
         { .reg = 0x00009888, .val = 0x149A5800 },
         { .reg = 0x00009888, .val = 0x169A00C0 },
         { .reg = 0x00009888, .val = 0x12980240 },
         { .reg = 0x00009888, .val = 0x14980002 },
         { .reg = 0x00009888, .val = 0x1A4E3FC0 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F3000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C9500 },
         { .reg = 0x00009888, .val = 0x0C4C002A },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0015 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C000A },
         { .reg = 0x00009888, .val = 0x04193000 },
         { .reg = 0x00009888, .val = 0x081A28C1 },
         { .reg = 0x00009888, .val = 0x001A0000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x0613C000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x00172000 },
         { .reg = 0x00009888, .val = 0x06178000 },
         { .reg = 0x00009888, .val = 0x0817A000 },
         { .reg = 0x00009888, .val = 0x00180037 },
         { .reg = 0x00009888, .val = 0x06180940 },
         { .reg = 0x00009888, .val = 0x08180000 },
         { .reg = 0x00009888, .val = 0x02180000 },
         { .reg = 0x00009888, .val = 0x04183000 },
         { .reg = 0x00009888, .val = 0x04AFC000 },
         { .reg = 0x00009888, .val = 0x06AF3000 },
         { .reg = 0x00009888, .val = 0x0ACC4000 },
         { .reg = 0x00009888, .val = 0x0CCC0015 },
         { .reg = 0x00009888, .val = 0x0A8DA000 },
         { .reg = 0x00009888, .val = 0x0C8DA000 },
         { .reg = 0x00009888, .val = 0x0E8F4000 },
         { .reg = 0x00009888, .val = 0x108F0015 },
         { .reg = 0x00009888, .val = 0x16ACA000 },
         { .reg = 0x00009888, .val = 0x18AC000A },
         { .reg = 0x00009888, .val = 0x06993000 },
         { .reg = 0x00009888, .val = 0x0C9A28C1 },
         { .reg = 0x00009888, .val = 0x009A0000 },
         { .reg = 0x00009888, .val = 0x0A93F000 },
         { .reg = 0x00009888, .val = 0x0C93F000 },
         { .reg = 0x00009888, .val = 0x0A97A000 },
         { .reg = 0x00009888, .val = 0x0C97A000 },
         { .reg = 0x00009888, .val = 0x0A980977 },
         { .reg = 0x00009888, .val = 0x08980000 },
         { .reg = 0x00009888, .val = 0x04980000 },
         { .reg = 0x00009888, .val = 0x06983000 },
         { .reg = 0x00009888, .val = 0x119000FF },
         { .reg = 0x00009888, .val = 0x51900050 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900115 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900884 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900002 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "0c5058ff-fdf4-4e0d-81fb-c0310fb76525";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 9);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x21D05800 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C25 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x21C05800 },
         { .reg = 0x00009888, .val = 0x09D000A5 },
         { .reg = 0x00009888, .val = 0x11D00000 },
         { .reg = 0x00009888, .val = 0x05D00000 },
         { .reg = 0x00009888, .val = 0x09D54000 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x1D810400 },
         { .reg = 0x00009888, .val = 0x21960000 },
         { .reg = 0x00009888, .val = 0x0996C000 },
         { .reg = 0x00009888, .val = 0x0B964000 },
         { .reg = 0x00009888, .val = 0x19938000 },
         { .reg = 0x00009888, .val = 0x1B930068 },
         { .reg = 0x00009888, .val = 0x15948000 },
         { .reg = 0x00009888, .val = 0x1B94000C },
         { .reg = 0x00009888, .val = 0x03957500 },
         { .reg = 0x00009888, .val = 0x1D950000 },
         { .reg = 0x00009888, .val = 0x17950000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x07928000 },
         { .reg = 0x00009888, .val = 0x03988000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x09978000 },
         { .reg = 0x00009888, .val = 0x0BC000A5 },
         { .reg = 0x00009888, .val = 0x11C00000 },
         { .reg = 0x00009888, .val = 0x05C00000 },
         { .reg = 0x00009888, .val = 0x0BC54000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900463 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0007C000 },
         { .reg = 0x00002774, .val = 0x000007FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "14cadcfd-fb3a-46a3-8c13-0a4f850a4b18";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810000 },
         { .reg = 0x00009888, .val = 0x07810013 },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930040 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "c889fd93-8dc7-4ba5-9451-de34a8b5ea3f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x122D3080 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0800 },
         { .reg = 0x00009888, .val = 0x0E0FAA00 },
         { .reg = 0x00009888, .val = 0x100F0002 },
         { .reg = 0x00009888, .val = 0x002D0025 },
         { .reg = 0x00009888, .val = 0x062D1300 },
         { .reg = 0x00009888, .val = 0x082D16A4 },
         { .reg = 0x00009888, .val = 0x0A2D162E },
         { .reg = 0x00009888, .val = 0x102D0000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt3__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt3_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute";
   query->symbol_name = "AsyncCompute";
   query->guid = "9d8e6b6c-c6ec-4591-99b5-7d9983e10d64";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt3__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt3__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt3__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_sklgt3(struct intel_perf_config *perf)
{
   sklgt3_register_render_basic_counter_query(perf);
   sklgt3_register_compute_basic_counter_query(perf);
   sklgt3_register_render_pipe_profile_counter_query(perf);
   sklgt3_register_memory_reads_counter_query(perf);
   sklgt3_register_memory_writes_counter_query(perf);
   sklgt3_register_compute_extended_counter_query(perf);
   sklgt3_register_compute_l3_cache_counter_query(perf);
   sklgt3_register_hdc_and_sf_counter_query(perf);
   sklgt3_register_l3_1_counter_query(perf);
   sklgt3_register_l3_2_counter_query(perf);
   sklgt3_register_l3_3_counter_query(perf);
   sklgt3_register_rasterizer_and_pixel_backend_counter_query(perf);
   sklgt3_register_sampler_counter_query(perf);
   sklgt3_register_tdl_1_counter_query(perf);
   sklgt3_register_tdl_2_counter_query(perf);
   sklgt3_register_compute_extra_counter_query(perf);
   sklgt3_register_vme_pipe_counter_query(perf);
   sklgt3_register_gpu_busyness_counter_query(perf);
   sklgt3_register_test_oa_counter_query(perf);
   sklgt3_register_pma__stall_counter_query(perf);
   sklgt3_register_async_compute_counter_query(perf);
}


static void
sklgt4_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "95322a71-bb05-4437-bc27-f7dd7b27d136";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 51);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C01E0 },
         { .reg = 0x00009888, .val = 0x12170280 },
         { .reg = 0x00009888, .val = 0x12370280 },
         { .reg = 0x00009888, .val = 0x16EC01E0 },
         { .reg = 0x00009888, .val = 0x176C01E0 },
         { .reg = 0x00009888, .val = 0x11930317 },
         { .reg = 0x00009888, .val = 0x159303DF },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x1A4E03B0 },
         { .reg = 0x00009888, .val = 0x0A6C0053 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x0A1B4000 },
         { .reg = 0x00009888, .val = 0x1C1C0001 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x042F1000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4CA400 },
         { .reg = 0x00009888, .val = 0x0C4C0002 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F5600 },
         { .reg = 0x00009888, .val = 0x100F0001 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x062D8000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x08133000 },
         { .reg = 0x00009888, .val = 0x00170020 },
         { .reg = 0x00009888, .val = 0x08170021 },
         { .reg = 0x00009888, .val = 0x10170000 },
         { .reg = 0x00009888, .val = 0x0633C000 },
         { .reg = 0x00009888, .val = 0x06370800 },
         { .reg = 0x00009888, .val = 0x10370000 },
         { .reg = 0x00009888, .val = 0x1ACE0230 },
         { .reg = 0x00009888, .val = 0x0AEC5300 },
         { .reg = 0x00009888, .val = 0x10EC0000 },
         { .reg = 0x00009888, .val = 0x1CEC0000 },
         { .reg = 0x00009888, .val = 0x0A9B8000 },
         { .reg = 0x00009888, .val = 0x1C9C0002 },
         { .reg = 0x00009888, .val = 0x0ACC2000 },
         { .reg = 0x00009888, .val = 0x0CCC0002 },
         { .reg = 0x00009888, .val = 0x088D8000 },
         { .reg = 0x00009888, .val = 0x0A8D8000 },
         { .reg = 0x00009888, .val = 0x0E8F1000 },
         { .reg = 0x00009888, .val = 0x108F0001 },
         { .reg = 0x00009888, .val = 0x16AC8800 },
         { .reg = 0x00009888, .val = 0x1B4E0020 },
         { .reg = 0x00009888, .val = 0x096C5300 },
         { .reg = 0x00009888, .val = 0x116C0000 },
         { .reg = 0x00009888, .val = 0x1D6C0000 },
         { .reg = 0x00009888, .val = 0x091B8000 },
         { .reg = 0x00009888, .val = 0x1B1C8000 },
         { .reg = 0x00009888, .val = 0x0B4C2000 },
         { .reg = 0x00009888, .val = 0x090D8000 },
         { .reg = 0x00009888, .val = 0x0F0F1000 },
         { .reg = 0x00009888, .val = 0x172C0800 },
         { .reg = 0x00009888, .val = 0x0D933031 },
         { .reg = 0x00009888, .val = 0x0F933E3F },
         { .reg = 0x00009888, .val = 0x01933D00 },
         { .reg = 0x00009888, .val = 0x0393073C },
         { .reg = 0x00009888, .val = 0x0593000E },
         { .reg = 0x00009888, .val = 0x1D930000 },
         { .reg = 0x00009888, .val = 0x19930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D908000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x5190FF30 },
         { .reg = 0x00009888, .val = 0x41900060 },
         { .reg = 0x00009888, .val = 0x55903033 },
         { .reg = 0x00009888, .val = 0x45901421 },
         { .reg = 0x00009888, .val = 0x47900803 },
         { .reg = 0x00009888, .val = 0x5790FFF1 },
         { .reg = 0x00009888, .val = 0x49900001 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x5990000F },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x5390FFFF },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "7277228f-e7f3-4743-945a-6a2049d11377";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x1A4E0820 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x064F0900 },
         { .reg = 0x00009888, .val = 0x084F0032 },
         { .reg = 0x00009888, .val = 0x0A4F1891 },
         { .reg = 0x00009888, .val = 0x0C4F0E00 },
         { .reg = 0x00009888, .val = 0x0E4F003C },
         { .reg = 0x00009888, .val = 0x004F0D80 },
         { .reg = 0x00009888, .val = 0x024F003B },
         { .reg = 0x00009888, .val = 0x006C0002 },
         { .reg = 0x00009888, .val = 0x086C0100 },
         { .reg = 0x00009888, .val = 0x0C6C000C },
         { .reg = 0x00009888, .val = 0x0E6C0B00 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x081B8000 },
         { .reg = 0x00009888, .val = 0x0C1B4000 },
         { .reg = 0x00009888, .val = 0x0E1B8000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1C8000 },
         { .reg = 0x00009888, .val = 0x1C1C0024 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5BC000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x1A5C6000 },
         { .reg = 0x00009888, .val = 0x1C5C001B },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2000 },
         { .reg = 0x00009888, .val = 0x0C4C0208 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2CC000 },
         { .reg = 0x00009888, .val = 0x162CFB00 },
         { .reg = 0x00009888, .val = 0x182C00BE },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x19900157 },
         { .reg = 0x00009888, .val = 0x1B900158 },
         { .reg = 0x00009888, .val = 0x1D900105 },
         { .reg = 0x00009888, .val = 0x1F900103 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x11900FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900821 },
         { .reg = 0x00009888, .val = 0x47900802 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900802 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900002 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900422 },
         { .reg = 0x00009888, .val = 0x53905555 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "463c668c-3f60-49b6-8f85-d995b635b3b2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C0E001F },
         { .reg = 0x00009888, .val = 0x0A0F0000 },
         { .reg = 0x00009888, .val = 0x10116800 },
         { .reg = 0x00009888, .val = 0x178A03E0 },
         { .reg = 0x00009888, .val = 0x11824C00 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x13840020 },
         { .reg = 0x00009888, .val = 0x11850019 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x01870C40 },
         { .reg = 0x00009888, .val = 0x17880000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020E5400 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x080F0040 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0040 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06110012 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x01898000 },
         { .reg = 0x00009888, .val = 0x0D890100 },
         { .reg = 0x00009888, .val = 0x03898000 },
         { .reg = 0x00009888, .val = 0x09808000 },
         { .reg = 0x00009888, .val = 0x0B808000 },
         { .reg = 0x00009888, .val = 0x0380C000 },
         { .reg = 0x00009888, .val = 0x0F8A0075 },
         { .reg = 0x00009888, .val = 0x1D8A0000 },
         { .reg = 0x00009888, .val = 0x118A8000 },
         { .reg = 0x00009888, .val = 0x1B8A4000 },
         { .reg = 0x00009888, .val = 0x138A8000 },
         { .reg = 0x00009888, .val = 0x1D81A000 },
         { .reg = 0x00009888, .val = 0x15818000 },
         { .reg = 0x00009888, .val = 0x17818000 },
         { .reg = 0x00009888, .val = 0x0B820030 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x0D824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x05824000 },
         { .reg = 0x00009888, .val = 0x0D830003 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x03838000 },
         { .reg = 0x00009888, .val = 0x07838000 },
         { .reg = 0x00009888, .val = 0x0B840980 },
         { .reg = 0x00009888, .val = 0x03844D80 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x09850080 },
         { .reg = 0x00009888, .val = 0x03850003 },
         { .reg = 0x00009888, .val = 0x01850000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x09870032 },
         { .reg = 0x00009888, .val = 0x01888052 },
         { .reg = 0x00009888, .val = 0x11880000 },
         { .reg = 0x00009888, .val = 0x09884000 },
         { .reg = 0x00009888, .val = 0x1B931001 },
         { .reg = 0x00009888, .val = 0x1D930001 },
         { .reg = 0x00009888, .val = 0x19934000 },
         { .reg = 0x00009888, .val = 0x1B958000 },
         { .reg = 0x00009888, .val = 0x1D950094 },
         { .reg = 0x00009888, .val = 0x19958000 },
         { .reg = 0x00009888, .val = 0x09E58000 },
         { .reg = 0x00009888, .val = 0x0BE58000 },
         { .reg = 0x00009888, .val = 0x03E5C000 },
         { .reg = 0x00009888, .val = 0x0592C000 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D924000 },
         { .reg = 0x00009888, .val = 0x0F924000 },
         { .reg = 0x00009888, .val = 0x11928000 },
         { .reg = 0x00009888, .val = 0x1392C000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x01985000 },
         { .reg = 0x00009888, .val = 0x07988000 },
         { .reg = 0x00009888, .val = 0x09981000 },
         { .reg = 0x00009888, .val = 0x0B982000 },
         { .reg = 0x00009888, .val = 0x0D982000 },
         { .reg = 0x00009888, .val = 0x0F989000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1190C080 },
         { .reg = 0x00009888, .val = 0x51901110 },
         { .reg = 0x00009888, .val = 0x41900440 },
         { .reg = 0x00009888, .val = 0x55901111 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x47900C21 },
         { .reg = 0x00009888, .val = 0x57901411 },
         { .reg = 0x00009888, .val = 0x49900042 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900024 },
         { .reg = 0x00009888, .val = 0x59900001 },
         { .reg = 0x00009888, .val = 0x43900841 },
         { .reg = 0x00009888, .val = 0x53900411 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "3ae6e74c-72c3-4040-9bd0-7961430b8cc8";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900064 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900150 },
         { .reg = 0x00009888, .val = 0x21900151 },
         { .reg = 0x00009888, .val = 0x23900152 },
         { .reg = 0x00009888, .val = 0x25900153 },
         { .reg = 0x00009888, .val = 0x27900154 },
         { .reg = 0x00009888, .val = 0x29900155 },
         { .reg = 0x00009888, .val = 0x2B900156 },
         { .reg = 0x00009888, .val = 0x2D900157 },
         { .reg = 0x00009888, .val = 0x2F90015F },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "055f256d-4052-467c-8dec-6064a4806433";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900160 },
         { .reg = 0x00009888, .val = 0x21900161 },
         { .reg = 0x00009888, .val = 0x23900162 },
         { .reg = 0x00009888, .val = 0x25900163 },
         { .reg = 0x00009888, .val = 0x27900164 },
         { .reg = 0x00009888, .val = 0x29900165 },
         { .reg = 0x00009888, .val = 0x2B900166 },
         { .reg = 0x00009888, .val = 0x2D900167 },
         { .reg = 0x00009888, .val = 0x2F900150 },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended set";
   query->symbol_name = "ComputeExtended";
   query->guid = "753972d4-87cd-4460-824d-754463ac5054";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x141C8160 },
         { .reg = 0x00009888, .val = 0x161C8015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4EAAA0 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0E6C0B01 },
         { .reg = 0x00009888, .val = 0x006C0200 },
         { .reg = 0x00009888, .val = 0x026C000C },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x001C0041 },
         { .reg = 0x00009888, .val = 0x061C4200 },
         { .reg = 0x00009888, .val = 0x081C4443 },
         { .reg = 0x00009888, .val = 0x0A1C4645 },
         { .reg = 0x00009888, .val = 0x0C1C7647 },
         { .reg = 0x00009888, .val = 0x041C7357 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x1A1C0000 },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4CAA2A },
         { .reg = 0x00009888, .val = 0x0C4C02AA },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5515 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x11907FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900802 },
         { .reg = 0x00009888, .val = 0x47900842 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900842 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900800 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "4e4392e9-8f73-457b-ab44-b49f7a0c733b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C0760 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4E8020 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1CE000 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2A00 },
         { .reg = 0x00009888, .val = 0x0C4C0280 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F1500 },
         { .reg = 0x00009888, .val = 0x100F0140 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x182C00A0 },
         { .reg = 0x00009888, .val = 0x03933300 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190030F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900021 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x53905555 },
         { .reg = 0x00009888, .val = 0x43900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "75f56991-b149-4122-ade9-d9c0c80c733b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x106C0232 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x004F1880 },
         { .reg = 0x00009888, .val = 0x024F08BB },
         { .reg = 0x00009888, .val = 0x044F001B },
         { .reg = 0x00009888, .val = 0x046C0100 },
         { .reg = 0x00009888, .val = 0x066C000B },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x041B8000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025BC000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x165C8000 },
         { .reg = 0x00009888, .val = 0x185C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00A0 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x062CC000 },
         { .reg = 0x00009888, .val = 0x082CC000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x1D950080 },
         { .reg = 0x00009888, .val = 0x13928000 },
         { .reg = 0x00009888, .val = 0x0F988000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900001 },
         { .reg = 0x00009888, .val = 0x4B900040 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "0d4cda70-da83-466f-b61e-7a064fe6a232";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C7B40 },
         { .reg = 0x00009888, .val = 0x166C0020 },
         { .reg = 0x00009888, .val = 0x0A603444 },
         { .reg = 0x00009888, .val = 0x0A613400 },
         { .reg = 0x00009888, .val = 0x1A4EA800 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0800 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x1C1C003C },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x10600000 },
         { .reg = 0x00009888, .val = 0x04600000 },
         { .reg = 0x00009888, .val = 0x0C610044 },
         { .reg = 0x00009888, .val = 0x10610000 },
         { .reg = 0x00009888, .val = 0x06610000 },
         { .reg = 0x00009888, .val = 0x0C4C02A8 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0154 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190FFC0 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900021 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900400 },
         { .reg = 0x00009888, .val = 0x43900421 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "20137ace-b110-4341-883f-1094c820093c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C02E0 },
         { .reg = 0x00009888, .val = 0x146C0001 },
         { .reg = 0x00009888, .val = 0x0A623400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x026C3324 },
         { .reg = 0x00009888, .val = 0x046C3422 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x06614000 },
         { .reg = 0x00009888, .val = 0x0C620044 },
         { .reg = 0x00009888, .val = 0x10620000 },
         { .reg = 0x00009888, .val = 0x06620000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_2__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_2__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "d3a6b05e-46ef-44a8-96c3-9bd3e58acbe6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C4E80 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x0A633400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x026C3321 },
         { .reg = 0x00009888, .val = 0x046C342F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C2000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x06604000 },
         { .reg = 0x00009888, .val = 0x0C630044 },
         { .reg = 0x00009888, .val = 0x10630000 },
         { .reg = 0x00009888, .val = 0x06630000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00AA },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "4081b948-63f1-4b1a-abaa-6017cb77a63b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102F3800 },
         { .reg = 0x00009888, .val = 0x144D0500 },
         { .reg = 0x00009888, .val = 0x120D03C0 },
         { .reg = 0x00009888, .val = 0x140D03CF },
         { .reg = 0x00009888, .val = 0x0C0F0004 },
         { .reg = 0x00009888, .val = 0x0C4E4000 },
         { .reg = 0x00009888, .val = 0x042F0480 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x022F0000 },
         { .reg = 0x00009888, .val = 0x0A4C0090 },
         { .reg = 0x00009888, .val = 0x064D0027 },
         { .reg = 0x00009888, .val = 0x004D0000 },
         { .reg = 0x00009888, .val = 0x000D0D40 },
         { .reg = 0x00009888, .val = 0x020D803F },
         { .reg = 0x00009888, .val = 0x040D8023 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020F0010 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0050 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x43901485 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "eae6bbb1-0e2b-478d-bd9e-f10ded2178f9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x14152C00 },
         { .reg = 0x00009888, .val = 0x16150005 },
         { .reg = 0x00009888, .val = 0x121600A0 },
         { .reg = 0x00009888, .val = 0x14352C00 },
         { .reg = 0x00009888, .val = 0x16350005 },
         { .reg = 0x00009888, .val = 0x123600A0 },
         { .reg = 0x00009888, .val = 0x14552C00 },
         { .reg = 0x00009888, .val = 0x16550005 },
         { .reg = 0x00009888, .val = 0x125600A0 },
         { .reg = 0x00009888, .val = 0x062F6000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x0C4C0050 },
         { .reg = 0x00009888, .val = 0x0A4C0010 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0350 },
         { .reg = 0x00009888, .val = 0x0C0FB000 },
         { .reg = 0x00009888, .val = 0x0E0F00DA },
         { .reg = 0x00009888, .val = 0x182C0028 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x022DC000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x0C138000 },
         { .reg = 0x00009888, .val = 0x0E132000 },
         { .reg = 0x00009888, .val = 0x0413C000 },
         { .reg = 0x00009888, .val = 0x1C140018 },
         { .reg = 0x00009888, .val = 0x0C157000 },
         { .reg = 0x00009888, .val = 0x0E150078 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04162180 },
         { .reg = 0x00009888, .val = 0x02160000 },
         { .reg = 0x00009888, .val = 0x04174000 },
         { .reg = 0x00009888, .val = 0x0233A000 },
         { .reg = 0x00009888, .val = 0x04333000 },
         { .reg = 0x00009888, .val = 0x14348000 },
         { .reg = 0x00009888, .val = 0x16348000 },
         { .reg = 0x00009888, .val = 0x02357870 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04360043 },
         { .reg = 0x00009888, .val = 0x02360000 },
         { .reg = 0x00009888, .val = 0x04371000 },
         { .reg = 0x00009888, .val = 0x0E538000 },
         { .reg = 0x00009888, .val = 0x00538000 },
         { .reg = 0x00009888, .val = 0x06533000 },
         { .reg = 0x00009888, .val = 0x1C540020 },
         { .reg = 0x00009888, .val = 0x12548000 },
         { .reg = 0x00009888, .val = 0x0E557000 },
         { .reg = 0x00009888, .val = 0x00557800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06560043 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x06571000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900060 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900060 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "1bba4bd2-81d4-45ec-847f-bc363e17e14d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12120000 },
         { .reg = 0x00009888, .val = 0x12320000 },
         { .reg = 0x00009888, .val = 0x12520000 },
         { .reg = 0x00009888, .val = 0x002F8000 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0015 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F03A0 },
         { .reg = 0x00009888, .val = 0x0C0FF000 },
         { .reg = 0x00009888, .val = 0x0E0F0095 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x02108000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x02118000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x02121880 },
         { .reg = 0x00009888, .val = 0x041219B5 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x02134000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x0C308000 },
         { .reg = 0x00009888, .val = 0x0E304000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x0C318000 },
         { .reg = 0x00009888, .val = 0x0E314000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x0C321A80 },
         { .reg = 0x00009888, .val = 0x0E320033 },
         { .reg = 0x00009888, .val = 0x06320031 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x0C334000 },
         { .reg = 0x00009888, .val = 0x0E331000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0E508000 },
         { .reg = 0x00009888, .val = 0x00508000 },
         { .reg = 0x00009888, .val = 0x02504000 },
         { .reg = 0x00009888, .val = 0x0E518000 },
         { .reg = 0x00009888, .val = 0x00518000 },
         { .reg = 0x00009888, .val = 0x02514000 },
         { .reg = 0x00009888, .val = 0x0E521880 },
         { .reg = 0x00009888, .val = 0x00521A80 },
         { .reg = 0x00009888, .val = 0x02520033 },
         { .reg = 0x00009888, .val = 0x0E534000 },
         { .reg = 0x00009888, .val = 0x00534000 },
         { .reg = 0x00009888, .val = 0x02531000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900062 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "a4b3106c-cfec-49f5-8e26-470470379e66";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12124D60 },
         { .reg = 0x00009888, .val = 0x12322E60 },
         { .reg = 0x00009888, .val = 0x12524D60 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0014 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0FE000 },
         { .reg = 0x00009888, .val = 0x0E0F0097 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x002D8000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x04121FB7 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x00308000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x00318000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x00321B80 },
         { .reg = 0x00009888, .val = 0x0632003F },
         { .reg = 0x00009888, .val = 0x00334000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0250C000 },
         { .reg = 0x00009888, .val = 0x0251C000 },
         { .reg = 0x00009888, .val = 0x02521FB7 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x02535000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900063 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "a5aa857d-e8f0-4dfa-8981-ce340fa748fd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121203E0 },
         { .reg = 0x00009888, .val = 0x123203E0 },
         { .reg = 0x00009888, .val = 0x125203E0 },
         { .reg = 0x00009888, .val = 0x129203E0 },
         { .reg = 0x00009888, .val = 0x12B203E0 },
         { .reg = 0x00009888, .val = 0x12D203E0 },
         { .reg = 0x00009888, .val = 0x131203E0 },
         { .reg = 0x00009888, .val = 0x133203E0 },
         { .reg = 0x00009888, .val = 0x135203E0 },
         { .reg = 0x00009888, .val = 0x1A4EF000 },
         { .reg = 0x00009888, .val = 0x1C4E0003 },
         { .reg = 0x00009888, .val = 0x024EC000 },
         { .reg = 0x00009888, .val = 0x044EC000 },
         { .reg = 0x00009888, .val = 0x064EC000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0C4C02A0 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C0042 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0150 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F006D },
         { .reg = 0x00009888, .val = 0x182C00A8 },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x042D8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06114000 },
         { .reg = 0x00009888, .val = 0x06120033 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04308000 },
         { .reg = 0x00009888, .val = 0x04318000 },
         { .reg = 0x00009888, .val = 0x04321980 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x04334000 },
         { .reg = 0x00009888, .val = 0x04504000 },
         { .reg = 0x00009888, .val = 0x04514000 },
         { .reg = 0x00009888, .val = 0x04520033 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x04531000 },
         { .reg = 0x00009888, .val = 0x1ACEF000 },
         { .reg = 0x00009888, .val = 0x1CCE0003 },
         { .reg = 0x00009888, .val = 0x00AF8000 },
         { .reg = 0x00009888, .val = 0x0CCC02A0 },
         { .reg = 0x00009888, .val = 0x0ACC0001 },
         { .reg = 0x00009888, .val = 0x0C8D8000 },
         { .reg = 0x00009888, .val = 0x0E8DA000 },
         { .reg = 0x00009888, .val = 0x008D8000 },
         { .reg = 0x00009888, .val = 0x028DA000 },
         { .reg = 0x00009888, .val = 0x108F0150 },
         { .reg = 0x00009888, .val = 0x0C8FB000 },
         { .reg = 0x00009888, .val = 0x0E8F0001 },
         { .reg = 0x00009888, .val = 0x18AC00A8 },
         { .reg = 0x00009888, .val = 0x06AC8000 },
         { .reg = 0x00009888, .val = 0x02AD4000 },
         { .reg = 0x00009888, .val = 0x02908000 },
         { .reg = 0x00009888, .val = 0x02918000 },
         { .reg = 0x00009888, .val = 0x02921980 },
         { .reg = 0x00009888, .val = 0x00920000 },
         { .reg = 0x00009888, .val = 0x02934000 },
         { .reg = 0x00009888, .val = 0x02B04000 },
         { .reg = 0x00009888, .val = 0x02B14000 },
         { .reg = 0x00009888, .val = 0x02B20033 },
         { .reg = 0x00009888, .val = 0x00B20000 },
         { .reg = 0x00009888, .val = 0x02B31000 },
         { .reg = 0x00009888, .val = 0x00D08000 },
         { .reg = 0x00009888, .val = 0x00D18000 },
         { .reg = 0x00009888, .val = 0x00D21980 },
         { .reg = 0x00009888, .val = 0x00D34000 },
         { .reg = 0x00009888, .val = 0x072F8000 },
         { .reg = 0x00009888, .val = 0x0D4C0100 },
         { .reg = 0x00009888, .val = 0x0D0D8000 },
         { .reg = 0x00009888, .val = 0x0F0DA000 },
         { .reg = 0x00009888, .val = 0x110F01B0 },
         { .reg = 0x00009888, .val = 0x192C0080 },
         { .reg = 0x00009888, .val = 0x0F2D4000 },
         { .reg = 0x00009888, .val = 0x0F108000 },
         { .reg = 0x00009888, .val = 0x0F118000 },
         { .reg = 0x00009888, .val = 0x0F121980 },
         { .reg = 0x00009888, .val = 0x01120000 },
         { .reg = 0x00009888, .val = 0x0F134000 },
         { .reg = 0x00009888, .val = 0x0F304000 },
         { .reg = 0x00009888, .val = 0x0F314000 },
         { .reg = 0x00009888, .val = 0x0F320033 },
         { .reg = 0x00009888, .val = 0x01320000 },
         { .reg = 0x00009888, .val = 0x0F331000 },
         { .reg = 0x00009888, .val = 0x0D508000 },
         { .reg = 0x00009888, .val = 0x0D518000 },
         { .reg = 0x00009888, .val = 0x0D521980 },
         { .reg = 0x00009888, .val = 0x01520000 },
         { .reg = 0x00009888, .val = 0x0D534000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900002 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51901100 },
         { .reg = 0x00009888, .val = 0x41901000 },
         { .reg = 0x00009888, .val = 0x43901423 },
         { .reg = 0x00009888, .val = 0x53903331 },
         { .reg = 0x00009888, .val = 0x45900044 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metrics set";
   query->symbol_name = "VMEPipe";
   query->guid = "0e8d8b86-4ee7-4cdd-aaaa-58adc92cb29e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A5800 },
         { .reg = 0x00009888, .val = 0x161A00C0 },
         { .reg = 0x00009888, .val = 0x12180240 },
         { .reg = 0x00009888, .val = 0x14180002 },
         { .reg = 0x00009888, .val = 0x149A5800 },
         { .reg = 0x00009888, .val = 0x169A00C0 },
         { .reg = 0x00009888, .val = 0x12980240 },
         { .reg = 0x00009888, .val = 0x14980002 },
         { .reg = 0x00009888, .val = 0x1A4E3FC0 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F3000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C9500 },
         { .reg = 0x00009888, .val = 0x0C4C002A },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0015 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C000A },
         { .reg = 0x00009888, .val = 0x04193000 },
         { .reg = 0x00009888, .val = 0x081A28C1 },
         { .reg = 0x00009888, .val = 0x001A0000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x0613C000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x00172000 },
         { .reg = 0x00009888, .val = 0x06178000 },
         { .reg = 0x00009888, .val = 0x0817A000 },
         { .reg = 0x00009888, .val = 0x00180037 },
         { .reg = 0x00009888, .val = 0x06180940 },
         { .reg = 0x00009888, .val = 0x08180000 },
         { .reg = 0x00009888, .val = 0x02180000 },
         { .reg = 0x00009888, .val = 0x04183000 },
         { .reg = 0x00009888, .val = 0x04AFC000 },
         { .reg = 0x00009888, .val = 0x06AF3000 },
         { .reg = 0x00009888, .val = 0x0ACC4000 },
         { .reg = 0x00009888, .val = 0x0CCC0015 },
         { .reg = 0x00009888, .val = 0x0A8DA000 },
         { .reg = 0x00009888, .val = 0x0C8DA000 },
         { .reg = 0x00009888, .val = 0x0E8F4000 },
         { .reg = 0x00009888, .val = 0x108F0015 },
         { .reg = 0x00009888, .val = 0x16ACA000 },
         { .reg = 0x00009888, .val = 0x18AC000A },
         { .reg = 0x00009888, .val = 0x06993000 },
         { .reg = 0x00009888, .val = 0x0C9A28C1 },
         { .reg = 0x00009888, .val = 0x009A0000 },
         { .reg = 0x00009888, .val = 0x0A93F000 },
         { .reg = 0x00009888, .val = 0x0C93F000 },
         { .reg = 0x00009888, .val = 0x0A97A000 },
         { .reg = 0x00009888, .val = 0x0C97A000 },
         { .reg = 0x00009888, .val = 0x0A980977 },
         { .reg = 0x00009888, .val = 0x08980000 },
         { .reg = 0x00009888, .val = 0x04980000 },
         { .reg = 0x00009888, .val = 0x06983000 },
         { .reg = 0x00009888, .val = 0x119000FF },
         { .reg = 0x00009888, .val = 0x51900010 },
         { .reg = 0x00009888, .val = 0x41900060 },
         { .reg = 0x00009888, .val = 0x55900111 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900821 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900002 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "4e5b1599-5b01-4b3d-89fa-6b26a25fe02b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 9);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x21D05800 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C25 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x21C05800 },
         { .reg = 0x00009888, .val = 0x09D000A5 },
         { .reg = 0x00009888, .val = 0x11D00000 },
         { .reg = 0x00009888, .val = 0x05D00000 },
         { .reg = 0x00009888, .val = 0x09D54000 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x1D810400 },
         { .reg = 0x00009888, .val = 0x21960000 },
         { .reg = 0x00009888, .val = 0x0996C000 },
         { .reg = 0x00009888, .val = 0x0B964000 },
         { .reg = 0x00009888, .val = 0x19938000 },
         { .reg = 0x00009888, .val = 0x1B930068 },
         { .reg = 0x00009888, .val = 0x15948000 },
         { .reg = 0x00009888, .val = 0x1B94000C },
         { .reg = 0x00009888, .val = 0x03957500 },
         { .reg = 0x00009888, .val = 0x1D950000 },
         { .reg = 0x00009888, .val = 0x17950000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x07928000 },
         { .reg = 0x00009888, .val = 0x03988000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x09978000 },
         { .reg = 0x00009888, .val = 0x0BC000A5 },
         { .reg = 0x00009888, .val = 0x11C00000 },
         { .reg = 0x00009888, .val = 0x05C00000 },
         { .reg = 0x00009888, .val = 0x0BC54000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900463 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0007C000 },
         { .reg = 0x00002774, .val = 0x000007FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "c1c2d40e-652a-482c-98fc-28bdb7c6ee44";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810000 },
         { .reg = 0x00009888, .val = 0x07810013 },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930040 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "befe9fd6-474e-4a3d-b98e-cd793715cf91";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x122D3080 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0800 },
         { .reg = 0x00009888, .val = 0x0E0FAA00 },
         { .reg = 0x00009888, .val = 0x100F0002 },
         { .reg = 0x00009888, .val = 0x002D0025 },
         { .reg = 0x00009888, .val = 0x062D1300 },
         { .reg = 0x00009888, .val = 0x082D16A4 },
         { .reg = 0x00009888, .val = 0x0A2D162E },
         { .reg = 0x00009888, .val = 0x102D0000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = sklgt4__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
sklgt4_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute";
   query->symbol_name = "AsyncCompute";
   query->guid = "796044f3-8f9d-4439-ab2c-aca5b046a1ef";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = sklgt4__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = sklgt4__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = sklgt4__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_sklgt4(struct intel_perf_config *perf)
{
   sklgt4_register_render_basic_counter_query(perf);
   sklgt4_register_compute_basic_counter_query(perf);
   sklgt4_register_render_pipe_profile_counter_query(perf);
   sklgt4_register_memory_reads_counter_query(perf);
   sklgt4_register_memory_writes_counter_query(perf);
   sklgt4_register_compute_extended_counter_query(perf);
   sklgt4_register_compute_l3_cache_counter_query(perf);
   sklgt4_register_hdc_and_sf_counter_query(perf);
   sklgt4_register_l3_1_counter_query(perf);
   sklgt4_register_l3_2_counter_query(perf);
   sklgt4_register_l3_3_counter_query(perf);
   sklgt4_register_rasterizer_and_pixel_backend_counter_query(perf);
   sklgt4_register_sampler_counter_query(perf);
   sklgt4_register_tdl_1_counter_query(perf);
   sklgt4_register_tdl_2_counter_query(perf);
   sklgt4_register_compute_extra_counter_query(perf);
   sklgt4_register_vme_pipe_counter_query(perf);
   sklgt4_register_gpu_busyness_counter_query(perf);
   sklgt4_register_test_oa_counter_query(perf);
   sklgt4_register_pma__stall_counter_query(perf);
   sklgt4_register_async_compute_counter_query(perf);
}


static void
kblgt2_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "99c1a40e-a090-4354-86e3-4d068bb1917e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C01E0 },
         { .reg = 0x00009888, .val = 0x12170280 },
         { .reg = 0x00009888, .val = 0x12370280 },
         { .reg = 0x00009888, .val = 0x11930317 },
         { .reg = 0x00009888, .val = 0x159303DF },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x1A4E0080 },
         { .reg = 0x00009888, .val = 0x0A6C0053 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x0A1B4000 },
         { .reg = 0x00009888, .val = 0x1C1C0001 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x042F1000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C8400 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0D2000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F6600 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x162C2200 },
         { .reg = 0x00009888, .val = 0x062D8000 },
         { .reg = 0x00009888, .val = 0x082D8000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x08133000 },
         { .reg = 0x00009888, .val = 0x00170020 },
         { .reg = 0x00009888, .val = 0x08170021 },
         { .reg = 0x00009888, .val = 0x10170000 },
         { .reg = 0x00009888, .val = 0x0633C000 },
         { .reg = 0x00009888, .val = 0x0833C000 },
         { .reg = 0x00009888, .val = 0x06370800 },
         { .reg = 0x00009888, .val = 0x08370840 },
         { .reg = 0x00009888, .val = 0x10370000 },
         { .reg = 0x00009888, .val = 0x0D933031 },
         { .reg = 0x00009888, .val = 0x0F933E3F },
         { .reg = 0x00009888, .val = 0x01933D00 },
         { .reg = 0x00009888, .val = 0x0393073C },
         { .reg = 0x00009888, .val = 0x0593000E },
         { .reg = 0x00009888, .val = 0x1D930000 },
         { .reg = 0x00009888, .val = 0x19930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D908000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190001F },
         { .reg = 0x00009888, .val = 0x51904400 },
         { .reg = 0x00009888, .val = 0x41900020 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C21 },
         { .reg = 0x00009888, .val = 0x47900061 },
         { .reg = 0x00009888, .val = 0x57904440 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900004 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53904444 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "12994724-3a5a-4303-bb3c-ba0175d2c200";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x1A4E0820 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x064F0900 },
         { .reg = 0x00009888, .val = 0x084F0032 },
         { .reg = 0x00009888, .val = 0x0A4F1891 },
         { .reg = 0x00009888, .val = 0x0C4F0E00 },
         { .reg = 0x00009888, .val = 0x0E4F003C },
         { .reg = 0x00009888, .val = 0x004F0D80 },
         { .reg = 0x00009888, .val = 0x024F003B },
         { .reg = 0x00009888, .val = 0x006C0002 },
         { .reg = 0x00009888, .val = 0x086C0100 },
         { .reg = 0x00009888, .val = 0x0C6C000C },
         { .reg = 0x00009888, .val = 0x0E6C0B00 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x081B8000 },
         { .reg = 0x00009888, .val = 0x0C1B4000 },
         { .reg = 0x00009888, .val = 0x0E1B8000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1C8000 },
         { .reg = 0x00009888, .val = 0x1C1C0024 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5BC000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x1A5C6000 },
         { .reg = 0x00009888, .val = 0x1C5C001B },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2000 },
         { .reg = 0x00009888, .val = 0x0C4C0208 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2CC000 },
         { .reg = 0x00009888, .val = 0x162CFB00 },
         { .reg = 0x00009888, .val = 0x182C00BE },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x19900157 },
         { .reg = 0x00009888, .val = 0x1B900158 },
         { .reg = 0x00009888, .val = 0x1D900105 },
         { .reg = 0x00009888, .val = 0x1F900103 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x11900FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900821 },
         { .reg = 0x00009888, .val = 0x47900802 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900802 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900002 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900422 },
         { .reg = 0x00009888, .val = 0x53904444 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "d7a17a3a-ca71-40d2-a919-ace80d50633f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C0E001F },
         { .reg = 0x00009888, .val = 0x0A0F0000 },
         { .reg = 0x00009888, .val = 0x10116800 },
         { .reg = 0x00009888, .val = 0x178A03E0 },
         { .reg = 0x00009888, .val = 0x11824C00 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x13840020 },
         { .reg = 0x00009888, .val = 0x11850019 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x01870C40 },
         { .reg = 0x00009888, .val = 0x17880000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020E5400 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x080F0040 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0040 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06110012 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x01898000 },
         { .reg = 0x00009888, .val = 0x0D890100 },
         { .reg = 0x00009888, .val = 0x03898000 },
         { .reg = 0x00009888, .val = 0x09808000 },
         { .reg = 0x00009888, .val = 0x0B808000 },
         { .reg = 0x00009888, .val = 0x0380C000 },
         { .reg = 0x00009888, .val = 0x0F8A0075 },
         { .reg = 0x00009888, .val = 0x1D8A0000 },
         { .reg = 0x00009888, .val = 0x118A8000 },
         { .reg = 0x00009888, .val = 0x1B8A4000 },
         { .reg = 0x00009888, .val = 0x138A8000 },
         { .reg = 0x00009888, .val = 0x1D81A000 },
         { .reg = 0x00009888, .val = 0x15818000 },
         { .reg = 0x00009888, .val = 0x17818000 },
         { .reg = 0x00009888, .val = 0x0B820030 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x0D824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x05824000 },
         { .reg = 0x00009888, .val = 0x0D830003 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x03838000 },
         { .reg = 0x00009888, .val = 0x07838000 },
         { .reg = 0x00009888, .val = 0x0B840980 },
         { .reg = 0x00009888, .val = 0x03844D80 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x09850080 },
         { .reg = 0x00009888, .val = 0x03850003 },
         { .reg = 0x00009888, .val = 0x01850000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x09870032 },
         { .reg = 0x00009888, .val = 0x01888052 },
         { .reg = 0x00009888, .val = 0x11880000 },
         { .reg = 0x00009888, .val = 0x09884000 },
         { .reg = 0x00009888, .val = 0x1B931001 },
         { .reg = 0x00009888, .val = 0x1D930001 },
         { .reg = 0x00009888, .val = 0x19934000 },
         { .reg = 0x00009888, .val = 0x1B958000 },
         { .reg = 0x00009888, .val = 0x1D950094 },
         { .reg = 0x00009888, .val = 0x19958000 },
         { .reg = 0x00009888, .val = 0x09E58000 },
         { .reg = 0x00009888, .val = 0x0BE58000 },
         { .reg = 0x00009888, .val = 0x03E5C000 },
         { .reg = 0x00009888, .val = 0x0592C000 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D924000 },
         { .reg = 0x00009888, .val = 0x0F924000 },
         { .reg = 0x00009888, .val = 0x11928000 },
         { .reg = 0x00009888, .val = 0x1392C000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x01985000 },
         { .reg = 0x00009888, .val = 0x07988000 },
         { .reg = 0x00009888, .val = 0x09981000 },
         { .reg = 0x00009888, .val = 0x0B982000 },
         { .reg = 0x00009888, .val = 0x0D982000 },
         { .reg = 0x00009888, .val = 0x0F989000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1190C080 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900440 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x47900C21 },
         { .reg = 0x00009888, .val = 0x57900400 },
         { .reg = 0x00009888, .val = 0x49900042 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900024 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900841 },
         { .reg = 0x00009888, .val = 0x53900400 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "57b59202-172b-477a-87de-33f85572c589";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900064 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900150 },
         { .reg = 0x00009888, .val = 0x21900151 },
         { .reg = 0x00009888, .val = 0x23900152 },
         { .reg = 0x00009888, .val = 0x25900153 },
         { .reg = 0x00009888, .val = 0x27900154 },
         { .reg = 0x00009888, .val = 0x29900155 },
         { .reg = 0x00009888, .val = 0x2B900156 },
         { .reg = 0x00009888, .val = 0x2D900157 },
         { .reg = 0x00009888, .val = 0x2F90015F },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "3addf8ef-8e9b-40f5-a448-3dbb5d5128b0";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900160 },
         { .reg = 0x00009888, .val = 0x21900161 },
         { .reg = 0x00009888, .val = 0x23900162 },
         { .reg = 0x00009888, .val = 0x25900163 },
         { .reg = 0x00009888, .val = 0x27900164 },
         { .reg = 0x00009888, .val = 0x29900165 },
         { .reg = 0x00009888, .val = 0x2B900166 },
         { .reg = 0x00009888, .val = 0x2D900167 },
         { .reg = 0x00009888, .val = 0x2F900150 },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended set";
   query->symbol_name = "ComputeExtended";
   query->guid = "4af0400a-81c3-47db-a6b6-deddbd75680e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x141C8160 },
         { .reg = 0x00009888, .val = 0x161C8015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4EAAA0 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0E6C0B01 },
         { .reg = 0x00009888, .val = 0x006C0200 },
         { .reg = 0x00009888, .val = 0x026C000C },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x001C0041 },
         { .reg = 0x00009888, .val = 0x061C4200 },
         { .reg = 0x00009888, .val = 0x081C4443 },
         { .reg = 0x00009888, .val = 0x0A1C4645 },
         { .reg = 0x00009888, .val = 0x0C1C7647 },
         { .reg = 0x00009888, .val = 0x041C7357 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x1A1C0000 },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4CAA2A },
         { .reg = 0x00009888, .val = 0x0C4C02AA },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5515 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x11907FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900802 },
         { .reg = 0x00009888, .val = 0x47900842 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900842 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900800 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "0e22f995-79ca-4f67-83ab-e9d9772488d8";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C0760 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4E8020 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1CE000 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2A00 },
         { .reg = 0x00009888, .val = 0x0C4C0280 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F1500 },
         { .reg = 0x00009888, .val = 0x100F0140 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x182C00A0 },
         { .reg = 0x00009888, .val = 0x03933300 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190030F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900021 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x53904444 },
         { .reg = 0x00009888, .val = 0x43900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "fb149d66-fad2-4230-b0d7-4d689b9116d3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x106C0232 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x004F1880 },
         { .reg = 0x00009888, .val = 0x024F08BB },
         { .reg = 0x00009888, .val = 0x044F001B },
         { .reg = 0x00009888, .val = 0x046C0100 },
         { .reg = 0x00009888, .val = 0x066C000B },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x041B8000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025BC000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x165C8000 },
         { .reg = 0x00009888, .val = 0x185C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00A0 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x062CC000 },
         { .reg = 0x00009888, .val = 0x082CC000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x1D950080 },
         { .reg = 0x00009888, .val = 0x13928000 },
         { .reg = 0x00009888, .val = 0x0F988000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900040 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "f889f8cc-4c93-4ac8-b75f-551c0b9b87f7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C7B40 },
         { .reg = 0x00009888, .val = 0x166C0020 },
         { .reg = 0x00009888, .val = 0x0A603444 },
         { .reg = 0x00009888, .val = 0x0A613400 },
         { .reg = 0x00009888, .val = 0x1A4EA800 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0800 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x1C1C003C },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x10600000 },
         { .reg = 0x00009888, .val = 0x04600000 },
         { .reg = 0x00009888, .val = 0x0C610044 },
         { .reg = 0x00009888, .val = 0x10610000 },
         { .reg = 0x00009888, .val = 0x06610000 },
         { .reg = 0x00009888, .val = 0x0C4C02A8 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0154 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190FFC0 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900021 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900400 },
         { .reg = 0x00009888, .val = 0x43900421 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "6d344efd-8e9e-42d4-a29e-1011c29f82c2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C02E0 },
         { .reg = 0x00009888, .val = 0x146C0001 },
         { .reg = 0x00009888, .val = 0x0A623400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x026C3324 },
         { .reg = 0x00009888, .val = 0x046C3422 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x06614000 },
         { .reg = 0x00009888, .val = 0x0C620044 },
         { .reg = 0x00009888, .val = 0x10620000 },
         { .reg = 0x00009888, .val = 0x06620000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_2__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_2__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "e3b386ae-c195-47d5-af29-8a1afa0ae2bf";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C4E80 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x0A633400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x026C3321 },
         { .reg = 0x00009888, .val = 0x046C342F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C2000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x06604000 },
         { .reg = 0x00009888, .val = 0x0C630044 },
         { .reg = 0x00009888, .val = 0x10630000 },
         { .reg = 0x00009888, .val = 0x06630000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00AA },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "f9954679-a055-4862-9f57-9d66e3ebf81c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102F3800 },
         { .reg = 0x00009888, .val = 0x144D0500 },
         { .reg = 0x00009888, .val = 0x120D03C0 },
         { .reg = 0x00009888, .val = 0x140D03CF },
         { .reg = 0x00009888, .val = 0x0C0F0004 },
         { .reg = 0x00009888, .val = 0x0C4E4000 },
         { .reg = 0x00009888, .val = 0x042F0480 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x022F0000 },
         { .reg = 0x00009888, .val = 0x0A4C0090 },
         { .reg = 0x00009888, .val = 0x064D0027 },
         { .reg = 0x00009888, .val = 0x004D0000 },
         { .reg = 0x00009888, .val = 0x000D0D40 },
         { .reg = 0x00009888, .val = 0x020D803F },
         { .reg = 0x00009888, .val = 0x040D8023 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020F0010 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0050 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x43901485 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "4ed5e27e-fd1a-4f11-ad8f-9374e128c697";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x14152C00 },
         { .reg = 0x00009888, .val = 0x16150005 },
         { .reg = 0x00009888, .val = 0x121600A0 },
         { .reg = 0x00009888, .val = 0x14352C00 },
         { .reg = 0x00009888, .val = 0x16350005 },
         { .reg = 0x00009888, .val = 0x123600A0 },
         { .reg = 0x00009888, .val = 0x14552C00 },
         { .reg = 0x00009888, .val = 0x16550005 },
         { .reg = 0x00009888, .val = 0x125600A0 },
         { .reg = 0x00009888, .val = 0x062F6000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x0C4C0050 },
         { .reg = 0x00009888, .val = 0x0A4C0010 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0350 },
         { .reg = 0x00009888, .val = 0x0C0FB000 },
         { .reg = 0x00009888, .val = 0x0E0F00DA },
         { .reg = 0x00009888, .val = 0x182C0028 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x022DC000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x0C138000 },
         { .reg = 0x00009888, .val = 0x0E132000 },
         { .reg = 0x00009888, .val = 0x0413C000 },
         { .reg = 0x00009888, .val = 0x1C140018 },
         { .reg = 0x00009888, .val = 0x0C157000 },
         { .reg = 0x00009888, .val = 0x0E150078 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04162180 },
         { .reg = 0x00009888, .val = 0x02160000 },
         { .reg = 0x00009888, .val = 0x04174000 },
         { .reg = 0x00009888, .val = 0x0233A000 },
         { .reg = 0x00009888, .val = 0x04333000 },
         { .reg = 0x00009888, .val = 0x14348000 },
         { .reg = 0x00009888, .val = 0x16348000 },
         { .reg = 0x00009888, .val = 0x02357870 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04360043 },
         { .reg = 0x00009888, .val = 0x02360000 },
         { .reg = 0x00009888, .val = 0x04371000 },
         { .reg = 0x00009888, .val = 0x0E538000 },
         { .reg = 0x00009888, .val = 0x00538000 },
         { .reg = 0x00009888, .val = 0x06533000 },
         { .reg = 0x00009888, .val = 0x1C540020 },
         { .reg = 0x00009888, .val = 0x12548000 },
         { .reg = 0x00009888, .val = 0x0E557000 },
         { .reg = 0x00009888, .val = 0x00557800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06560043 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x06571000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900060 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900060 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "9eaf384d-8f53-41b8-a71d-108308780fbc";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12120000 },
         { .reg = 0x00009888, .val = 0x12320000 },
         { .reg = 0x00009888, .val = 0x12520000 },
         { .reg = 0x00009888, .val = 0x002F8000 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0015 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F03A0 },
         { .reg = 0x00009888, .val = 0x0C0FF000 },
         { .reg = 0x00009888, .val = 0x0E0F0095 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x02108000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x02118000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x02121880 },
         { .reg = 0x00009888, .val = 0x041219B5 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x02134000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x0C308000 },
         { .reg = 0x00009888, .val = 0x0E304000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x0C318000 },
         { .reg = 0x00009888, .val = 0x0E314000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x0C321A80 },
         { .reg = 0x00009888, .val = 0x0E320033 },
         { .reg = 0x00009888, .val = 0x06320031 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x0C334000 },
         { .reg = 0x00009888, .val = 0x0E331000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0E508000 },
         { .reg = 0x00009888, .val = 0x00508000 },
         { .reg = 0x00009888, .val = 0x02504000 },
         { .reg = 0x00009888, .val = 0x0E518000 },
         { .reg = 0x00009888, .val = 0x00518000 },
         { .reg = 0x00009888, .val = 0x02514000 },
         { .reg = 0x00009888, .val = 0x0E521880 },
         { .reg = 0x00009888, .val = 0x00521A80 },
         { .reg = 0x00009888, .val = 0x02520033 },
         { .reg = 0x00009888, .val = 0x0E534000 },
         { .reg = 0x00009888, .val = 0x00534000 },
         { .reg = 0x00009888, .val = 0x02531000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900062 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "a00cbdf2-eabd-4240-9a89-86e2ac1ca1e4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12124D60 },
         { .reg = 0x00009888, .val = 0x12322E60 },
         { .reg = 0x00009888, .val = 0x12524D60 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0014 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0FE000 },
         { .reg = 0x00009888, .val = 0x0E0F0097 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x002D8000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x04121FB7 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x00308000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x00318000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x00321B80 },
         { .reg = 0x00009888, .val = 0x0632003F },
         { .reg = 0x00009888, .val = 0x00334000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0250C000 },
         { .reg = 0x00009888, .val = 0x0251C000 },
         { .reg = 0x00009888, .val = 0x02521FB7 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x02535000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900063 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "aa7a3fb9-22fb-43ff-a32d-0ab6c13bbd16";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121203E0 },
         { .reg = 0x00009888, .val = 0x123203E0 },
         { .reg = 0x00009888, .val = 0x125203E0 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0E0F006C },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x042D8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06114000 },
         { .reg = 0x00009888, .val = 0x06120033 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04308000 },
         { .reg = 0x00009888, .val = 0x04318000 },
         { .reg = 0x00009888, .val = 0x04321980 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x04334000 },
         { .reg = 0x00009888, .val = 0x04504000 },
         { .reg = 0x00009888, .val = 0x04514000 },
         { .reg = 0x00009888, .val = 0x04520033 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x04531000 },
         { .reg = 0x00009888, .val = 0x1190E000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C00 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00001000 },
         { .reg = 0x0000E558, .val = 0x00003002 },
         { .reg = 0x0000E658, .val = 0x00005004 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00050012 },
         { .reg = 0x0000E55C, .val = 0x00052051 },
         { .reg = 0x0000E65C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metrics set";
   query->symbol_name = "VMEPipe";
   query->guid = "398a4268-ef6f-4ffc-b55f-3c7b5363ce61";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A5800 },
         { .reg = 0x00009888, .val = 0x161A00C0 },
         { .reg = 0x00009888, .val = 0x12180240 },
         { .reg = 0x00009888, .val = 0x14180002 },
         { .reg = 0x00009888, .val = 0x143A5800 },
         { .reg = 0x00009888, .val = 0x163A00C0 },
         { .reg = 0x00009888, .val = 0x12380240 },
         { .reg = 0x00009888, .val = 0x14380002 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F3000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C1500 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F9500 },
         { .reg = 0x00009888, .val = 0x100F002A },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x0A2DC000 },
         { .reg = 0x00009888, .val = 0x0C2DC000 },
         { .reg = 0x00009888, .val = 0x04193000 },
         { .reg = 0x00009888, .val = 0x081A28C1 },
         { .reg = 0x00009888, .val = 0x001A0000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x0613C000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x00172000 },
         { .reg = 0x00009888, .val = 0x06178000 },
         { .reg = 0x00009888, .val = 0x0817A000 },
         { .reg = 0x00009888, .val = 0x00180037 },
         { .reg = 0x00009888, .val = 0x06180940 },
         { .reg = 0x00009888, .val = 0x08180000 },
         { .reg = 0x00009888, .val = 0x02180000 },
         { .reg = 0x00009888, .val = 0x04183000 },
         { .reg = 0x00009888, .val = 0x06393000 },
         { .reg = 0x00009888, .val = 0x0C3A28C1 },
         { .reg = 0x00009888, .val = 0x003A0000 },
         { .reg = 0x00009888, .val = 0x0A33F000 },
         { .reg = 0x00009888, .val = 0x0C33F000 },
         { .reg = 0x00009888, .val = 0x0A37A000 },
         { .reg = 0x00009888, .val = 0x0C37A000 },
         { .reg = 0x00009888, .val = 0x0A380977 },
         { .reg = 0x00009888, .val = 0x08380000 },
         { .reg = 0x00009888, .val = 0x04380000 },
         { .reg = 0x00009888, .val = 0x06383000 },
         { .reg = 0x00009888, .val = 0x119000FF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900800 },
         { .reg = 0x00009888, .val = 0x47901000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900844 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "6c66fe6e-2988-454a-bfae-7fca3bbcbec2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 8);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C00 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x19C05800 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x1D810400 },
         { .reg = 0x00009888, .val = 0x07960025 },
         { .reg = 0x00009888, .val = 0x21960000 },
         { .reg = 0x00009888, .val = 0x0B964000 },
         { .reg = 0x00009888, .val = 0x1B930062 },
         { .reg = 0x00009888, .val = 0x17948000 },
         { .reg = 0x00009888, .val = 0x1B940008 },
         { .reg = 0x00009888, .val = 0x05950075 },
         { .reg = 0x00009888, .val = 0x1D950000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x09978000 },
         { .reg = 0x00009888, .val = 0x05C08500 },
         { .reg = 0x00009888, .val = 0x25C00000 },
         { .reg = 0x00009888, .val = 0x1BC00000 },
         { .reg = 0x00009888, .val = 0x0BC54000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C60 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00078000 },
         { .reg = 0x00002774, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "cd4b577b-073a-423f-a948-e198dd818c71";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810000 },
         { .reg = 0x00009888, .val = 0x07810013 },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930040 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "b49aa434-4958-4d98-9e6f-443ff27ca74d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x122D3080 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0800 },
         { .reg = 0x00009888, .val = 0x0E0FAA00 },
         { .reg = 0x00009888, .val = 0x100F0002 },
         { .reg = 0x00009888, .val = 0x002D0025 },
         { .reg = 0x00009888, .val = 0x062D1300 },
         { .reg = 0x00009888, .val = 0x082D16A4 },
         { .reg = 0x00009888, .val = 0x0A2D162E },
         { .reg = 0x00009888, .val = 0x102D0000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt2__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt2_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute";
   query->symbol_name = "AsyncCompute";
   query->guid = "4032137b-8dcc-4f3f-912d-c0bb14a123bc";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt2__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt2__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt2__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_kblgt2(struct intel_perf_config *perf)
{
   kblgt2_register_render_basic_counter_query(perf);
   kblgt2_register_compute_basic_counter_query(perf);
   kblgt2_register_render_pipe_profile_counter_query(perf);
   kblgt2_register_memory_reads_counter_query(perf);
   kblgt2_register_memory_writes_counter_query(perf);
   kblgt2_register_compute_extended_counter_query(perf);
   kblgt2_register_compute_l3_cache_counter_query(perf);
   kblgt2_register_hdc_and_sf_counter_query(perf);
   kblgt2_register_l3_1_counter_query(perf);
   kblgt2_register_l3_2_counter_query(perf);
   kblgt2_register_l3_3_counter_query(perf);
   kblgt2_register_rasterizer_and_pixel_backend_counter_query(perf);
   kblgt2_register_sampler_counter_query(perf);
   kblgt2_register_tdl_1_counter_query(perf);
   kblgt2_register_tdl_2_counter_query(perf);
   kblgt2_register_compute_extra_counter_query(perf);
   kblgt2_register_vme_pipe_counter_query(perf);
   kblgt2_register_gpu_busyness_counter_query(perf);
   kblgt2_register_test_oa_counter_query(perf);
   kblgt2_register_pma__stall_counter_query(perf);
   kblgt2_register_async_compute_counter_query(perf);
}


static void
kblgt3_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "17b4f3e0-d578-4ae3-b7a8-98d756d1e0df";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C01E0 },
         { .reg = 0x00009888, .val = 0x12170280 },
         { .reg = 0x00009888, .val = 0x12370280 },
         { .reg = 0x00009888, .val = 0x16EC01E0 },
         { .reg = 0x00009888, .val = 0x11930317 },
         { .reg = 0x00009888, .val = 0x159303DF },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x1A4E0380 },
         { .reg = 0x00009888, .val = 0x0A6C0053 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x0A1B4000 },
         { .reg = 0x00009888, .val = 0x1C1C0001 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x042F1000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C8400 },
         { .reg = 0x00009888, .val = 0x0C4C0002 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F6600 },
         { .reg = 0x00009888, .val = 0x100F0001 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x162CA200 },
         { .reg = 0x00009888, .val = 0x062D8000 },
         { .reg = 0x00009888, .val = 0x082D8000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x08133000 },
         { .reg = 0x00009888, .val = 0x00170020 },
         { .reg = 0x00009888, .val = 0x08170021 },
         { .reg = 0x00009888, .val = 0x10170000 },
         { .reg = 0x00009888, .val = 0x0633C000 },
         { .reg = 0x00009888, .val = 0x0833C000 },
         { .reg = 0x00009888, .val = 0x06370800 },
         { .reg = 0x00009888, .val = 0x08370840 },
         { .reg = 0x00009888, .val = 0x10370000 },
         { .reg = 0x00009888, .val = 0x1ACE0200 },
         { .reg = 0x00009888, .val = 0x0AEC5300 },
         { .reg = 0x00009888, .val = 0x10EC0000 },
         { .reg = 0x00009888, .val = 0x1CEC0000 },
         { .reg = 0x00009888, .val = 0x0A9B8000 },
         { .reg = 0x00009888, .val = 0x1C9C0002 },
         { .reg = 0x00009888, .val = 0x0CCC0002 },
         { .reg = 0x00009888, .val = 0x0A8D8000 },
         { .reg = 0x00009888, .val = 0x108F0001 },
         { .reg = 0x00009888, .val = 0x16AC8000 },
         { .reg = 0x00009888, .val = 0x0D933031 },
         { .reg = 0x00009888, .val = 0x0F933E3F },
         { .reg = 0x00009888, .val = 0x01933D00 },
         { .reg = 0x00009888, .val = 0x0393073C },
         { .reg = 0x00009888, .val = 0x0593000E },
         { .reg = 0x00009888, .val = 0x1D930000 },
         { .reg = 0x00009888, .val = 0x19930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D908000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51902240 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x55900242 },
         { .reg = 0x00009888, .val = 0x45900084 },
         { .reg = 0x00009888, .val = 0x47901400 },
         { .reg = 0x00009888, .val = 0x57902220 },
         { .reg = 0x00009888, .val = 0x49900C60 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900002 },
         { .reg = 0x00009888, .val = 0x43900C63 },
         { .reg = 0x00009888, .val = 0x53902222 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "a5dc0bc5-d6fa-4f3a-9979-d3248c786042";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x1A4E0820 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x064F0900 },
         { .reg = 0x00009888, .val = 0x084F0032 },
         { .reg = 0x00009888, .val = 0x0A4F1891 },
         { .reg = 0x00009888, .val = 0x0C4F0E00 },
         { .reg = 0x00009888, .val = 0x0E4F003C },
         { .reg = 0x00009888, .val = 0x004F0D80 },
         { .reg = 0x00009888, .val = 0x024F003B },
         { .reg = 0x00009888, .val = 0x006C0002 },
         { .reg = 0x00009888, .val = 0x086C0100 },
         { .reg = 0x00009888, .val = 0x0C6C000C },
         { .reg = 0x00009888, .val = 0x0E6C0B00 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x081B8000 },
         { .reg = 0x00009888, .val = 0x0C1B4000 },
         { .reg = 0x00009888, .val = 0x0E1B8000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1C8000 },
         { .reg = 0x00009888, .val = 0x1C1C0024 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5BC000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x1A5C6000 },
         { .reg = 0x00009888, .val = 0x1C5C001B },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2000 },
         { .reg = 0x00009888, .val = 0x0C4C0208 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2CC000 },
         { .reg = 0x00009888, .val = 0x162CFB00 },
         { .reg = 0x00009888, .val = 0x182C00BE },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x19900157 },
         { .reg = 0x00009888, .val = 0x1B900158 },
         { .reg = 0x00009888, .val = 0x1D900105 },
         { .reg = 0x00009888, .val = 0x1F900103 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x11900FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900821 },
         { .reg = 0x00009888, .val = 0x47900802 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900802 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900002 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900422 },
         { .reg = 0x00009888, .val = 0x53904444 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "c7c735f3-ce58-45cf-aa04-30b183f1faff";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C0E001F },
         { .reg = 0x00009888, .val = 0x0A0F0000 },
         { .reg = 0x00009888, .val = 0x10116800 },
         { .reg = 0x00009888, .val = 0x178A03E0 },
         { .reg = 0x00009888, .val = 0x11824C00 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x13840020 },
         { .reg = 0x00009888, .val = 0x11850019 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x01870C40 },
         { .reg = 0x00009888, .val = 0x17880000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020E5400 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x080F0040 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0040 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06110012 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x01898000 },
         { .reg = 0x00009888, .val = 0x0D890100 },
         { .reg = 0x00009888, .val = 0x03898000 },
         { .reg = 0x00009888, .val = 0x09808000 },
         { .reg = 0x00009888, .val = 0x0B808000 },
         { .reg = 0x00009888, .val = 0x0380C000 },
         { .reg = 0x00009888, .val = 0x0F8A0075 },
         { .reg = 0x00009888, .val = 0x1D8A0000 },
         { .reg = 0x00009888, .val = 0x118A8000 },
         { .reg = 0x00009888, .val = 0x1B8A4000 },
         { .reg = 0x00009888, .val = 0x138A8000 },
         { .reg = 0x00009888, .val = 0x1D81A000 },
         { .reg = 0x00009888, .val = 0x15818000 },
         { .reg = 0x00009888, .val = 0x17818000 },
         { .reg = 0x00009888, .val = 0x0B820030 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x0D824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x05824000 },
         { .reg = 0x00009888, .val = 0x0D830003 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x03838000 },
         { .reg = 0x00009888, .val = 0x07838000 },
         { .reg = 0x00009888, .val = 0x0B840980 },
         { .reg = 0x00009888, .val = 0x03844D80 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x09850080 },
         { .reg = 0x00009888, .val = 0x03850003 },
         { .reg = 0x00009888, .val = 0x01850000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x09870032 },
         { .reg = 0x00009888, .val = 0x01888052 },
         { .reg = 0x00009888, .val = 0x11880000 },
         { .reg = 0x00009888, .val = 0x09884000 },
         { .reg = 0x00009888, .val = 0x1B931001 },
         { .reg = 0x00009888, .val = 0x1D930001 },
         { .reg = 0x00009888, .val = 0x19934000 },
         { .reg = 0x00009888, .val = 0x1B958000 },
         { .reg = 0x00009888, .val = 0x1D950094 },
         { .reg = 0x00009888, .val = 0x19958000 },
         { .reg = 0x00009888, .val = 0x09E58000 },
         { .reg = 0x00009888, .val = 0x0BE58000 },
         { .reg = 0x00009888, .val = 0x03E5C000 },
         { .reg = 0x00009888, .val = 0x0592C000 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D924000 },
         { .reg = 0x00009888, .val = 0x0F924000 },
         { .reg = 0x00009888, .val = 0x11928000 },
         { .reg = 0x00009888, .val = 0x1392C000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x01985000 },
         { .reg = 0x00009888, .val = 0x07988000 },
         { .reg = 0x00009888, .val = 0x09981000 },
         { .reg = 0x00009888, .val = 0x0B982000 },
         { .reg = 0x00009888, .val = 0x0D982000 },
         { .reg = 0x00009888, .val = 0x0F989000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1190C080 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900440 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x47900C21 },
         { .reg = 0x00009888, .val = 0x57900400 },
         { .reg = 0x00009888, .val = 0x49900042 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900024 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900841 },
         { .reg = 0x00009888, .val = 0x53900400 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "96ec2219-040b-428a-856a-6bc03363a057";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900064 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900150 },
         { .reg = 0x00009888, .val = 0x21900151 },
         { .reg = 0x00009888, .val = 0x23900152 },
         { .reg = 0x00009888, .val = 0x25900153 },
         { .reg = 0x00009888, .val = 0x27900154 },
         { .reg = 0x00009888, .val = 0x29900155 },
         { .reg = 0x00009888, .val = 0x2B900156 },
         { .reg = 0x00009888, .val = 0x2D900157 },
         { .reg = 0x00009888, .val = 0x2F90015F },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "03372b64-4996-4d3b-aa18-790e75eeb9c2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900160 },
         { .reg = 0x00009888, .val = 0x21900161 },
         { .reg = 0x00009888, .val = 0x23900162 },
         { .reg = 0x00009888, .val = 0x25900163 },
         { .reg = 0x00009888, .val = 0x27900164 },
         { .reg = 0x00009888, .val = 0x29900165 },
         { .reg = 0x00009888, .val = 0x2B900166 },
         { .reg = 0x00009888, .val = 0x2D900167 },
         { .reg = 0x00009888, .val = 0x2F900150 },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended set";
   query->symbol_name = "ComputeExtended";
   query->guid = "31b4ce5a-bd61-4c1f-bb5d-f2e731412150";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x141C8160 },
         { .reg = 0x00009888, .val = 0x161C8015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4EAAA0 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0E6C0B01 },
         { .reg = 0x00009888, .val = 0x006C0200 },
         { .reg = 0x00009888, .val = 0x026C000C },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x001C0041 },
         { .reg = 0x00009888, .val = 0x061C4200 },
         { .reg = 0x00009888, .val = 0x081C4443 },
         { .reg = 0x00009888, .val = 0x0A1C4645 },
         { .reg = 0x00009888, .val = 0x0C1C7647 },
         { .reg = 0x00009888, .val = 0x041C7357 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x1A1C0000 },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4CAA2A },
         { .reg = 0x00009888, .val = 0x0C4C02AA },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5515 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x11907FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900802 },
         { .reg = 0x00009888, .val = 0x47900842 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900842 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900800 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "2ce0911a-27fc-4887-96f0-11084fa807c3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C0760 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4E8020 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1CE000 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2A00 },
         { .reg = 0x00009888, .val = 0x0C4C0280 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F1500 },
         { .reg = 0x00009888, .val = 0x100F0140 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x182C00A0 },
         { .reg = 0x00009888, .val = 0x03933300 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190030F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900021 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x53904444 },
         { .reg = 0x00009888, .val = 0x43900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "b128f45f-bfa2-4991-aadc-05be7898e398";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x106C0232 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x004F1880 },
         { .reg = 0x00009888, .val = 0x024F08BB },
         { .reg = 0x00009888, .val = 0x044F001B },
         { .reg = 0x00009888, .val = 0x046C0100 },
         { .reg = 0x00009888, .val = 0x066C000B },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x041B8000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025BC000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x165C8000 },
         { .reg = 0x00009888, .val = 0x185C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00A0 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x062CC000 },
         { .reg = 0x00009888, .val = 0x082CC000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x1D950080 },
         { .reg = 0x00009888, .val = 0x13928000 },
         { .reg = 0x00009888, .val = 0x0F988000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900040 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "f3566b55-9c13-47bf-8327-4a887af75262";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C7B40 },
         { .reg = 0x00009888, .val = 0x166C0020 },
         { .reg = 0x00009888, .val = 0x0A603444 },
         { .reg = 0x00009888, .val = 0x0A613400 },
         { .reg = 0x00009888, .val = 0x1A4EA800 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0800 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x1C1C003C },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x10600000 },
         { .reg = 0x00009888, .val = 0x04600000 },
         { .reg = 0x00009888, .val = 0x0C610044 },
         { .reg = 0x00009888, .val = 0x10610000 },
         { .reg = 0x00009888, .val = 0x06610000 },
         { .reg = 0x00009888, .val = 0x0C4C02A8 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0154 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190FFC0 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900021 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900400 },
         { .reg = 0x00009888, .val = 0x43900421 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "7b21aa68-9076-4ab4-8a84-e45a06c8eac6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C02E0 },
         { .reg = 0x00009888, .val = 0x146C0001 },
         { .reg = 0x00009888, .val = 0x0A623400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x026C3324 },
         { .reg = 0x00009888, .val = 0x046C3422 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x06614000 },
         { .reg = 0x00009888, .val = 0x0C620044 },
         { .reg = 0x00009888, .val = 0x10620000 },
         { .reg = 0x00009888, .val = 0x06620000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_2__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_2__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "71b4b9dd-ae28-48ce-9a1a-5431e1f23afd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C4E80 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x0A633400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x026C3321 },
         { .reg = 0x00009888, .val = 0x046C342F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C2000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x06604000 },
         { .reg = 0x00009888, .val = 0x0C630044 },
         { .reg = 0x00009888, .val = 0x10630000 },
         { .reg = 0x00009888, .val = 0x06630000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00AA },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "52c646ca-52da-4853-877e-8645e73ed330";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102F3800 },
         { .reg = 0x00009888, .val = 0x144D0500 },
         { .reg = 0x00009888, .val = 0x120D03C0 },
         { .reg = 0x00009888, .val = 0x140D03CF },
         { .reg = 0x00009888, .val = 0x0C0F0004 },
         { .reg = 0x00009888, .val = 0x0C4E4000 },
         { .reg = 0x00009888, .val = 0x042F0480 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x022F0000 },
         { .reg = 0x00009888, .val = 0x0A4C0090 },
         { .reg = 0x00009888, .val = 0x064D0027 },
         { .reg = 0x00009888, .val = 0x004D0000 },
         { .reg = 0x00009888, .val = 0x000D0D40 },
         { .reg = 0x00009888, .val = 0x020D803F },
         { .reg = 0x00009888, .val = 0x040D8023 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020F0010 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0050 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x43901485 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "c4bb682f-677c-4875-990b-005230be87b4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x14152C00 },
         { .reg = 0x00009888, .val = 0x16150005 },
         { .reg = 0x00009888, .val = 0x121600A0 },
         { .reg = 0x00009888, .val = 0x14352C00 },
         { .reg = 0x00009888, .val = 0x16350005 },
         { .reg = 0x00009888, .val = 0x123600A0 },
         { .reg = 0x00009888, .val = 0x14552C00 },
         { .reg = 0x00009888, .val = 0x16550005 },
         { .reg = 0x00009888, .val = 0x125600A0 },
         { .reg = 0x00009888, .val = 0x062F6000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x0C4C0050 },
         { .reg = 0x00009888, .val = 0x0A4C0010 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0350 },
         { .reg = 0x00009888, .val = 0x0C0FB000 },
         { .reg = 0x00009888, .val = 0x0E0F00DA },
         { .reg = 0x00009888, .val = 0x182C0028 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x022DC000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x0C138000 },
         { .reg = 0x00009888, .val = 0x0E132000 },
         { .reg = 0x00009888, .val = 0x0413C000 },
         { .reg = 0x00009888, .val = 0x1C140018 },
         { .reg = 0x00009888, .val = 0x0C157000 },
         { .reg = 0x00009888, .val = 0x0E150078 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04162180 },
         { .reg = 0x00009888, .val = 0x02160000 },
         { .reg = 0x00009888, .val = 0x04174000 },
         { .reg = 0x00009888, .val = 0x0233A000 },
         { .reg = 0x00009888, .val = 0x04333000 },
         { .reg = 0x00009888, .val = 0x14348000 },
         { .reg = 0x00009888, .val = 0x16348000 },
         { .reg = 0x00009888, .val = 0x02357870 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04360043 },
         { .reg = 0x00009888, .val = 0x02360000 },
         { .reg = 0x00009888, .val = 0x04371000 },
         { .reg = 0x00009888, .val = 0x0E538000 },
         { .reg = 0x00009888, .val = 0x00538000 },
         { .reg = 0x00009888, .val = 0x06533000 },
         { .reg = 0x00009888, .val = 0x1C540020 },
         { .reg = 0x00009888, .val = 0x12548000 },
         { .reg = 0x00009888, .val = 0x0E557000 },
         { .reg = 0x00009888, .val = 0x00557800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06560043 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x06571000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900060 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900060 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "ade8521b-5e72-45d4-8c25-d9f0da774899";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12120000 },
         { .reg = 0x00009888, .val = 0x12320000 },
         { .reg = 0x00009888, .val = 0x12520000 },
         { .reg = 0x00009888, .val = 0x002F8000 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0015 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F03A0 },
         { .reg = 0x00009888, .val = 0x0C0FF000 },
         { .reg = 0x00009888, .val = 0x0E0F0095 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x02108000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x02118000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x02121880 },
         { .reg = 0x00009888, .val = 0x041219B5 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x02134000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x0C308000 },
         { .reg = 0x00009888, .val = 0x0E304000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x0C318000 },
         { .reg = 0x00009888, .val = 0x0E314000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x0C321A80 },
         { .reg = 0x00009888, .val = 0x0E320033 },
         { .reg = 0x00009888, .val = 0x06320031 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x0C334000 },
         { .reg = 0x00009888, .val = 0x0E331000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0E508000 },
         { .reg = 0x00009888, .val = 0x00508000 },
         { .reg = 0x00009888, .val = 0x02504000 },
         { .reg = 0x00009888, .val = 0x0E518000 },
         { .reg = 0x00009888, .val = 0x00518000 },
         { .reg = 0x00009888, .val = 0x02514000 },
         { .reg = 0x00009888, .val = 0x0E521880 },
         { .reg = 0x00009888, .val = 0x00521A80 },
         { .reg = 0x00009888, .val = 0x02520033 },
         { .reg = 0x00009888, .val = 0x0E534000 },
         { .reg = 0x00009888, .val = 0x00534000 },
         { .reg = 0x00009888, .val = 0x02531000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900062 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "9df2815d-4e9e-417f-bde1-3788dc6e1ea1";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12124D60 },
         { .reg = 0x00009888, .val = 0x12322E60 },
         { .reg = 0x00009888, .val = 0x12524D60 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0014 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0FE000 },
         { .reg = 0x00009888, .val = 0x0E0F0097 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x002D8000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x04121FB7 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x00308000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x00318000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x00321B80 },
         { .reg = 0x00009888, .val = 0x0632003F },
         { .reg = 0x00009888, .val = 0x00334000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0250C000 },
         { .reg = 0x00009888, .val = 0x0251C000 },
         { .reg = 0x00009888, .val = 0x02521FB7 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x02535000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900063 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "efc497ac-884e-4ee4-a4a8-15fba22aaf21";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121203E0 },
         { .reg = 0x00009888, .val = 0x123203E0 },
         { .reg = 0x00009888, .val = 0x125203E0 },
         { .reg = 0x00009888, .val = 0x129203E0 },
         { .reg = 0x00009888, .val = 0x12B203E0 },
         { .reg = 0x00009888, .val = 0x12D203E0 },
         { .reg = 0x00009888, .val = 0x024EC000 },
         { .reg = 0x00009888, .val = 0x044EC000 },
         { .reg = 0x00009888, .val = 0x064EC000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C0042 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F006D },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x042D8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06114000 },
         { .reg = 0x00009888, .val = 0x06120033 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04308000 },
         { .reg = 0x00009888, .val = 0x04318000 },
         { .reg = 0x00009888, .val = 0x04321980 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x04334000 },
         { .reg = 0x00009888, .val = 0x04504000 },
         { .reg = 0x00009888, .val = 0x04514000 },
         { .reg = 0x00009888, .val = 0x04520033 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x04531000 },
         { .reg = 0x00009888, .val = 0x00AF8000 },
         { .reg = 0x00009888, .val = 0x0ACC0001 },
         { .reg = 0x00009888, .val = 0x008D8000 },
         { .reg = 0x00009888, .val = 0x028DA000 },
         { .reg = 0x00009888, .val = 0x0C8FB000 },
         { .reg = 0x00009888, .val = 0x0E8F0001 },
         { .reg = 0x00009888, .val = 0x06AC8000 },
         { .reg = 0x00009888, .val = 0x02AD4000 },
         { .reg = 0x00009888, .val = 0x02908000 },
         { .reg = 0x00009888, .val = 0x02918000 },
         { .reg = 0x00009888, .val = 0x02921980 },
         { .reg = 0x00009888, .val = 0x00920000 },
         { .reg = 0x00009888, .val = 0x02934000 },
         { .reg = 0x00009888, .val = 0x02B04000 },
         { .reg = 0x00009888, .val = 0x02B14000 },
         { .reg = 0x00009888, .val = 0x02B20033 },
         { .reg = 0x00009888, .val = 0x00B20000 },
         { .reg = 0x00009888, .val = 0x02B31000 },
         { .reg = 0x00009888, .val = 0x00D08000 },
         { .reg = 0x00009888, .val = 0x00D18000 },
         { .reg = 0x00009888, .val = 0x00D21980 },
         { .reg = 0x00009888, .val = 0x00D34000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900002 },
         { .reg = 0x00009888, .val = 0x53900420 },
         { .reg = 0x00009888, .val = 0x459000A1 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metrics set";
   query->symbol_name = "VMEPipe";
   query->guid = "bfd9764d-2c5b-4c16-bfc1-89de3ca10917";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A5800 },
         { .reg = 0x00009888, .val = 0x161A00C0 },
         { .reg = 0x00009888, .val = 0x12180240 },
         { .reg = 0x00009888, .val = 0x14180002 },
         { .reg = 0x00009888, .val = 0x149A5800 },
         { .reg = 0x00009888, .val = 0x169A00C0 },
         { .reg = 0x00009888, .val = 0x12980240 },
         { .reg = 0x00009888, .val = 0x14980002 },
         { .reg = 0x00009888, .val = 0x1A4E3FC0 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F3000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C9500 },
         { .reg = 0x00009888, .val = 0x0C4C002A },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0015 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C000A },
         { .reg = 0x00009888, .val = 0x04193000 },
         { .reg = 0x00009888, .val = 0x081A28C1 },
         { .reg = 0x00009888, .val = 0x001A0000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x0613C000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x00172000 },
         { .reg = 0x00009888, .val = 0x06178000 },
         { .reg = 0x00009888, .val = 0x0817A000 },
         { .reg = 0x00009888, .val = 0x00180037 },
         { .reg = 0x00009888, .val = 0x06180940 },
         { .reg = 0x00009888, .val = 0x08180000 },
         { .reg = 0x00009888, .val = 0x02180000 },
         { .reg = 0x00009888, .val = 0x04183000 },
         { .reg = 0x00009888, .val = 0x04AFC000 },
         { .reg = 0x00009888, .val = 0x06AF3000 },
         { .reg = 0x00009888, .val = 0x0ACC4000 },
         { .reg = 0x00009888, .val = 0x0CCC0015 },
         { .reg = 0x00009888, .val = 0x0A8DA000 },
         { .reg = 0x00009888, .val = 0x0C8DA000 },
         { .reg = 0x00009888, .val = 0x0E8F4000 },
         { .reg = 0x00009888, .val = 0x108F0015 },
         { .reg = 0x00009888, .val = 0x16ACA000 },
         { .reg = 0x00009888, .val = 0x18AC000A },
         { .reg = 0x00009888, .val = 0x06993000 },
         { .reg = 0x00009888, .val = 0x0C9A28C1 },
         { .reg = 0x00009888, .val = 0x009A0000 },
         { .reg = 0x00009888, .val = 0x0A93F000 },
         { .reg = 0x00009888, .val = 0x0C93F000 },
         { .reg = 0x00009888, .val = 0x0A97A000 },
         { .reg = 0x00009888, .val = 0x0C97A000 },
         { .reg = 0x00009888, .val = 0x0A980977 },
         { .reg = 0x00009888, .val = 0x08980000 },
         { .reg = 0x00009888, .val = 0x04980000 },
         { .reg = 0x00009888, .val = 0x06983000 },
         { .reg = 0x00009888, .val = 0x119000FF },
         { .reg = 0x00009888, .val = 0x51900040 },
         { .reg = 0x00009888, .val = 0x41900020 },
         { .reg = 0x00009888, .val = 0x55900004 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x479008A5 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900002 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "b55ecba1-2aa9-422e-89ff-b9e30f03d447";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 9);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19D05800 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C25 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x19C05800 },
         { .reg = 0x00009888, .val = 0x05D00085 },
         { .reg = 0x00009888, .val = 0x25D00000 },
         { .reg = 0x00009888, .val = 0x09D54000 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x1D810400 },
         { .reg = 0x00009888, .val = 0x21960000 },
         { .reg = 0x00009888, .val = 0x0996C000 },
         { .reg = 0x00009888, .val = 0x0B964000 },
         { .reg = 0x00009888, .val = 0x19938000 },
         { .reg = 0x00009888, .val = 0x1B930068 },
         { .reg = 0x00009888, .val = 0x15948000 },
         { .reg = 0x00009888, .val = 0x1B94000C },
         { .reg = 0x00009888, .val = 0x03957500 },
         { .reg = 0x00009888, .val = 0x1D950000 },
         { .reg = 0x00009888, .val = 0x17950000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x07928000 },
         { .reg = 0x00009888, .val = 0x03988000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x09978000 },
         { .reg = 0x00009888, .val = 0x05C08500 },
         { .reg = 0x00009888, .val = 0x25C00000 },
         { .reg = 0x00009888, .val = 0x1BC00000 },
         { .reg = 0x00009888, .val = 0x0BC54000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900463 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0007C000 },
         { .reg = 0x00002774, .val = 0x000007FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "bb3344bf-2551-40f7-b75f-cbf29e4195f7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810000 },
         { .reg = 0x00009888, .val = 0x07810013 },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930040 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "85bc2e4f-2563-4388-921b-dc0dad879cf3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x122D3080 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0800 },
         { .reg = 0x00009888, .val = 0x0E0FAA00 },
         { .reg = 0x00009888, .val = 0x100F0002 },
         { .reg = 0x00009888, .val = 0x002D0025 },
         { .reg = 0x00009888, .val = 0x062D1300 },
         { .reg = 0x00009888, .val = 0x082D16A4 },
         { .reg = 0x00009888, .val = 0x0A2D162E },
         { .reg = 0x00009888, .val = 0x102D0000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = kblgt3__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
kblgt3_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute";
   query->symbol_name = "AsyncCompute";
   query->guid = "8d6d28f5-f699-4eec-89f7-216956bf2ed5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = kblgt3__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = kblgt3__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = kblgt3__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_kblgt3(struct intel_perf_config *perf)
{
   kblgt3_register_render_basic_counter_query(perf);
   kblgt3_register_compute_basic_counter_query(perf);
   kblgt3_register_render_pipe_profile_counter_query(perf);
   kblgt3_register_memory_reads_counter_query(perf);
   kblgt3_register_memory_writes_counter_query(perf);
   kblgt3_register_compute_extended_counter_query(perf);
   kblgt3_register_compute_l3_cache_counter_query(perf);
   kblgt3_register_hdc_and_sf_counter_query(perf);
   kblgt3_register_l3_1_counter_query(perf);
   kblgt3_register_l3_2_counter_query(perf);
   kblgt3_register_l3_3_counter_query(perf);
   kblgt3_register_rasterizer_and_pixel_backend_counter_query(perf);
   kblgt3_register_sampler_counter_query(perf);
   kblgt3_register_tdl_1_counter_query(perf);
   kblgt3_register_tdl_2_counter_query(perf);
   kblgt3_register_compute_extra_counter_query(perf);
   kblgt3_register_vme_pipe_counter_query(perf);
   kblgt3_register_gpu_busyness_counter_query(perf);
   kblgt3_register_test_oa_counter_query(perf);
   kblgt3_register_pma__stall_counter_query(perf);
   kblgt3_register_async_compute_counter_query(perf);
}


static void
cflgt2_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "7fa796a4-0c7a-4201-afc6-cff0b2f528a2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C01E0 },
         { .reg = 0x00009888, .val = 0x12170280 },
         { .reg = 0x00009888, .val = 0x12370280 },
         { .reg = 0x00009888, .val = 0x11930317 },
         { .reg = 0x00009888, .val = 0x159303DF },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x1A4E0080 },
         { .reg = 0x00009888, .val = 0x0A6C0053 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x0A1B4000 },
         { .reg = 0x00009888, .val = 0x1C1C0001 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x042F1000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C8400 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0D2000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F6600 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x162C2200 },
         { .reg = 0x00009888, .val = 0x062D8000 },
         { .reg = 0x00009888, .val = 0x082D8000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x08133000 },
         { .reg = 0x00009888, .val = 0x00170020 },
         { .reg = 0x00009888, .val = 0x08170021 },
         { .reg = 0x00009888, .val = 0x10170000 },
         { .reg = 0x00009888, .val = 0x0633C000 },
         { .reg = 0x00009888, .val = 0x0833C000 },
         { .reg = 0x00009888, .val = 0x06370800 },
         { .reg = 0x00009888, .val = 0x08370840 },
         { .reg = 0x00009888, .val = 0x10370000 },
         { .reg = 0x00009888, .val = 0x0D933031 },
         { .reg = 0x00009888, .val = 0x0F933E3F },
         { .reg = 0x00009888, .val = 0x01933D00 },
         { .reg = 0x00009888, .val = 0x0393073C },
         { .reg = 0x00009888, .val = 0x0593000E },
         { .reg = 0x00009888, .val = 0x1D930000 },
         { .reg = 0x00009888, .val = 0x19930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D908000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190001F },
         { .reg = 0x00009888, .val = 0x51904400 },
         { .reg = 0x00009888, .val = 0x41900020 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C21 },
         { .reg = 0x00009888, .val = 0x47900061 },
         { .reg = 0x00009888, .val = 0x57904440 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900004 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53904444 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "a4985100-5f76-4822-8ca7-67908cb26274";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x1A4E0820 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x064F0900 },
         { .reg = 0x00009888, .val = 0x084F0032 },
         { .reg = 0x00009888, .val = 0x0A4F1891 },
         { .reg = 0x00009888, .val = 0x0C4F0E00 },
         { .reg = 0x00009888, .val = 0x0E4F003C },
         { .reg = 0x00009888, .val = 0x004F0D80 },
         { .reg = 0x00009888, .val = 0x024F003B },
         { .reg = 0x00009888, .val = 0x006C0002 },
         { .reg = 0x00009888, .val = 0x086C0100 },
         { .reg = 0x00009888, .val = 0x0C6C000C },
         { .reg = 0x00009888, .val = 0x0E6C0B00 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x081B8000 },
         { .reg = 0x00009888, .val = 0x0C1B4000 },
         { .reg = 0x00009888, .val = 0x0E1B8000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1C8000 },
         { .reg = 0x00009888, .val = 0x1C1C0024 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5BC000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x1A5C6000 },
         { .reg = 0x00009888, .val = 0x1C5C001B },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2000 },
         { .reg = 0x00009888, .val = 0x0C4C0208 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2CC000 },
         { .reg = 0x00009888, .val = 0x162CFB00 },
         { .reg = 0x00009888, .val = 0x182C00BE },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x19900157 },
         { .reg = 0x00009888, .val = 0x1B900158 },
         { .reg = 0x00009888, .val = 0x1D900105 },
         { .reg = 0x00009888, .val = 0x1F900103 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x11900FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900821 },
         { .reg = 0x00009888, .val = 0x47900802 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900802 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900002 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900422 },
         { .reg = 0x00009888, .val = 0x53904444 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "2221e4d5-ed7b-445e-b2cc-3de1b97f4d42";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C0E001F },
         { .reg = 0x00009888, .val = 0x0A0F0000 },
         { .reg = 0x00009888, .val = 0x10116800 },
         { .reg = 0x00009888, .val = 0x178A03E0 },
         { .reg = 0x00009888, .val = 0x11824C00 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x13840020 },
         { .reg = 0x00009888, .val = 0x11850019 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x01870C40 },
         { .reg = 0x00009888, .val = 0x17880000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020E5400 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x080F0040 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0040 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06110012 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x01898000 },
         { .reg = 0x00009888, .val = 0x0D890100 },
         { .reg = 0x00009888, .val = 0x03898000 },
         { .reg = 0x00009888, .val = 0x09808000 },
         { .reg = 0x00009888, .val = 0x0B808000 },
         { .reg = 0x00009888, .val = 0x0380C000 },
         { .reg = 0x00009888, .val = 0x0F8A0075 },
         { .reg = 0x00009888, .val = 0x1D8A0000 },
         { .reg = 0x00009888, .val = 0x118A8000 },
         { .reg = 0x00009888, .val = 0x1B8A4000 },
         { .reg = 0x00009888, .val = 0x138A8000 },
         { .reg = 0x00009888, .val = 0x1D81A000 },
         { .reg = 0x00009888, .val = 0x15818000 },
         { .reg = 0x00009888, .val = 0x17818000 },
         { .reg = 0x00009888, .val = 0x0B820030 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x0D824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x05824000 },
         { .reg = 0x00009888, .val = 0x0D830003 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x03838000 },
         { .reg = 0x00009888, .val = 0x07838000 },
         { .reg = 0x00009888, .val = 0x0B840980 },
         { .reg = 0x00009888, .val = 0x03844D80 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x09850080 },
         { .reg = 0x00009888, .val = 0x03850003 },
         { .reg = 0x00009888, .val = 0x01850000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x09870032 },
         { .reg = 0x00009888, .val = 0x01888052 },
         { .reg = 0x00009888, .val = 0x11880000 },
         { .reg = 0x00009888, .val = 0x09884000 },
         { .reg = 0x00009888, .val = 0x1B931001 },
         { .reg = 0x00009888, .val = 0x1D930001 },
         { .reg = 0x00009888, .val = 0x19934000 },
         { .reg = 0x00009888, .val = 0x1B958000 },
         { .reg = 0x00009888, .val = 0x1D950094 },
         { .reg = 0x00009888, .val = 0x19958000 },
         { .reg = 0x00009888, .val = 0x09E58000 },
         { .reg = 0x00009888, .val = 0x0BE58000 },
         { .reg = 0x00009888, .val = 0x03E5C000 },
         { .reg = 0x00009888, .val = 0x0592C000 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D924000 },
         { .reg = 0x00009888, .val = 0x0F924000 },
         { .reg = 0x00009888, .val = 0x11928000 },
         { .reg = 0x00009888, .val = 0x1392C000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x01985000 },
         { .reg = 0x00009888, .val = 0x07988000 },
         { .reg = 0x00009888, .val = 0x09981000 },
         { .reg = 0x00009888, .val = 0x0B982000 },
         { .reg = 0x00009888, .val = 0x0D982000 },
         { .reg = 0x00009888, .val = 0x0F989000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1190C080 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900440 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x47900C21 },
         { .reg = 0x00009888, .val = 0x57900400 },
         { .reg = 0x00009888, .val = 0x49900042 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900024 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900841 },
         { .reg = 0x00009888, .val = 0x53900400 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "82096a90-e2fa-4f38-ac14-562b2496933a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900064 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900150 },
         { .reg = 0x00009888, .val = 0x21900151 },
         { .reg = 0x00009888, .val = 0x23900152 },
         { .reg = 0x00009888, .val = 0x25900153 },
         { .reg = 0x00009888, .val = 0x27900154 },
         { .reg = 0x00009888, .val = 0x29900155 },
         { .reg = 0x00009888, .val = 0x2B900156 },
         { .reg = 0x00009888, .val = 0x2D900157 },
         { .reg = 0x00009888, .val = 0x2F90015F },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "9f638880-02e9-4a8d-896a-7670a3bf0d35";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900160 },
         { .reg = 0x00009888, .val = 0x21900161 },
         { .reg = 0x00009888, .val = 0x23900162 },
         { .reg = 0x00009888, .val = 0x25900163 },
         { .reg = 0x00009888, .val = 0x27900164 },
         { .reg = 0x00009888, .val = 0x29900165 },
         { .reg = 0x00009888, .val = 0x2B900166 },
         { .reg = 0x00009888, .val = 0x2D900167 },
         { .reg = 0x00009888, .val = 0x2F900150 },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended set";
   query->symbol_name = "ComputeExtended";
   query->guid = "8d4ad934-7c16-43d5-845a-51067a4c8e2f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x141C8160 },
         { .reg = 0x00009888, .val = 0x161C8015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4EAAA0 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0E6C0B01 },
         { .reg = 0x00009888, .val = 0x006C0200 },
         { .reg = 0x00009888, .val = 0x026C000C },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x001C0041 },
         { .reg = 0x00009888, .val = 0x061C4200 },
         { .reg = 0x00009888, .val = 0x081C4443 },
         { .reg = 0x00009888, .val = 0x0A1C4645 },
         { .reg = 0x00009888, .val = 0x0C1C7647 },
         { .reg = 0x00009888, .val = 0x041C7357 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x1A1C0000 },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4CAA2A },
         { .reg = 0x00009888, .val = 0x0C4C02AA },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5515 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x11907FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900802 },
         { .reg = 0x00009888, .val = 0x47900842 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900842 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900800 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "4389cf07-1424-4963-b2d2-64fcec75406d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C0760 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4E8020 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1CE000 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2A00 },
         { .reg = 0x00009888, .val = 0x0C4C0280 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F1500 },
         { .reg = 0x00009888, .val = 0x100F0140 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x182C00A0 },
         { .reg = 0x00009888, .val = 0x03933300 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190030F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900021 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x53904444 },
         { .reg = 0x00009888, .val = 0x43900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "ac544b3a-ff78-46ea-9808-ee6fef0978b4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x106C0232 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x004F1880 },
         { .reg = 0x00009888, .val = 0x024F08BB },
         { .reg = 0x00009888, .val = 0x044F001B },
         { .reg = 0x00009888, .val = 0x046C0100 },
         { .reg = 0x00009888, .val = 0x066C000B },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x041B8000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025BC000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x165C8000 },
         { .reg = 0x00009888, .val = 0x185C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00A0 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x062CC000 },
         { .reg = 0x00009888, .val = 0x082CC000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x1D950080 },
         { .reg = 0x00009888, .val = 0x13928000 },
         { .reg = 0x00009888, .val = 0x0F988000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900040 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "a35c5867-0ab8-4d45-8fc1-eb0906f0eff5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C7B40 },
         { .reg = 0x00009888, .val = 0x166C0020 },
         { .reg = 0x00009888, .val = 0x0A603444 },
         { .reg = 0x00009888, .val = 0x0A613400 },
         { .reg = 0x00009888, .val = 0x1A4EA800 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0800 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x1C1C003C },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x10600000 },
         { .reg = 0x00009888, .val = 0x04600000 },
         { .reg = 0x00009888, .val = 0x0C610044 },
         { .reg = 0x00009888, .val = 0x10610000 },
         { .reg = 0x00009888, .val = 0x06610000 },
         { .reg = 0x00009888, .val = 0x0C4C02A8 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0154 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190FFC0 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900021 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900400 },
         { .reg = 0x00009888, .val = 0x43900421 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "bb4bcce9-2cbb-4818-9e49-67ce2c99cd25";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C02E0 },
         { .reg = 0x00009888, .val = 0x146C0001 },
         { .reg = 0x00009888, .val = 0x0A623400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x026C3324 },
         { .reg = 0x00009888, .val = 0x046C3422 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x06614000 },
         { .reg = 0x00009888, .val = 0x0C620044 },
         { .reg = 0x00009888, .val = 0x10620000 },
         { .reg = 0x00009888, .val = 0x06620000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_2__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_2__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "9e1229f8-d87f-4f44-b067-3544e17195aa";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C4E80 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x0A633400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x026C3321 },
         { .reg = 0x00009888, .val = 0x046C342F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C2000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x06604000 },
         { .reg = 0x00009888, .val = 0x0C630044 },
         { .reg = 0x00009888, .val = 0x10630000 },
         { .reg = 0x00009888, .val = 0x06630000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00AA },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "3e91908a-1c70-4004-b2ab-a10ef14f24f9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102F3800 },
         { .reg = 0x00009888, .val = 0x144D0500 },
         { .reg = 0x00009888, .val = 0x120D03C0 },
         { .reg = 0x00009888, .val = 0x140D03CF },
         { .reg = 0x00009888, .val = 0x0C0F0004 },
         { .reg = 0x00009888, .val = 0x0C4E4000 },
         { .reg = 0x00009888, .val = 0x042F0480 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x022F0000 },
         { .reg = 0x00009888, .val = 0x0A4C0090 },
         { .reg = 0x00009888, .val = 0x064D0027 },
         { .reg = 0x00009888, .val = 0x004D0000 },
         { .reg = 0x00009888, .val = 0x000D0D40 },
         { .reg = 0x00009888, .val = 0x020D803F },
         { .reg = 0x00009888, .val = 0x040D8023 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020F0010 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0050 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x43901485 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "9a6ba957-7955-4a09-a8ce-083030ba3c1f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x14152C00 },
         { .reg = 0x00009888, .val = 0x16150005 },
         { .reg = 0x00009888, .val = 0x121600A0 },
         { .reg = 0x00009888, .val = 0x14352C00 },
         { .reg = 0x00009888, .val = 0x16350005 },
         { .reg = 0x00009888, .val = 0x123600A0 },
         { .reg = 0x00009888, .val = 0x14552C00 },
         { .reg = 0x00009888, .val = 0x16550005 },
         { .reg = 0x00009888, .val = 0x125600A0 },
         { .reg = 0x00009888, .val = 0x062F6000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x0C4C0050 },
         { .reg = 0x00009888, .val = 0x0A4C0010 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0350 },
         { .reg = 0x00009888, .val = 0x0C0FB000 },
         { .reg = 0x00009888, .val = 0x0E0F00DA },
         { .reg = 0x00009888, .val = 0x182C0028 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x022DC000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x0C138000 },
         { .reg = 0x00009888, .val = 0x0E132000 },
         { .reg = 0x00009888, .val = 0x0413C000 },
         { .reg = 0x00009888, .val = 0x1C140018 },
         { .reg = 0x00009888, .val = 0x0C157000 },
         { .reg = 0x00009888, .val = 0x0E150078 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04162180 },
         { .reg = 0x00009888, .val = 0x02160000 },
         { .reg = 0x00009888, .val = 0x04174000 },
         { .reg = 0x00009888, .val = 0x0233A000 },
         { .reg = 0x00009888, .val = 0x04333000 },
         { .reg = 0x00009888, .val = 0x14348000 },
         { .reg = 0x00009888, .val = 0x16348000 },
         { .reg = 0x00009888, .val = 0x02357870 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04360043 },
         { .reg = 0x00009888, .val = 0x02360000 },
         { .reg = 0x00009888, .val = 0x04371000 },
         { .reg = 0x00009888, .val = 0x0E538000 },
         { .reg = 0x00009888, .val = 0x00538000 },
         { .reg = 0x00009888, .val = 0x06533000 },
         { .reg = 0x00009888, .val = 0x1C540020 },
         { .reg = 0x00009888, .val = 0x12548000 },
         { .reg = 0x00009888, .val = 0x0E557000 },
         { .reg = 0x00009888, .val = 0x00557800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06560043 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x06571000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900060 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900060 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "c54e7ca3-c60c-4396-ac3d-f9899c9a6ee4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12120000 },
         { .reg = 0x00009888, .val = 0x12320000 },
         { .reg = 0x00009888, .val = 0x12520000 },
         { .reg = 0x00009888, .val = 0x002F8000 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0015 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F03A0 },
         { .reg = 0x00009888, .val = 0x0C0FF000 },
         { .reg = 0x00009888, .val = 0x0E0F0095 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x02108000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x02118000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x02121880 },
         { .reg = 0x00009888, .val = 0x041219B5 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x02134000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x0C308000 },
         { .reg = 0x00009888, .val = 0x0E304000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x0C318000 },
         { .reg = 0x00009888, .val = 0x0E314000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x0C321A80 },
         { .reg = 0x00009888, .val = 0x0E320033 },
         { .reg = 0x00009888, .val = 0x06320031 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x0C334000 },
         { .reg = 0x00009888, .val = 0x0E331000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0E508000 },
         { .reg = 0x00009888, .val = 0x00508000 },
         { .reg = 0x00009888, .val = 0x02504000 },
         { .reg = 0x00009888, .val = 0x0E518000 },
         { .reg = 0x00009888, .val = 0x00518000 },
         { .reg = 0x00009888, .val = 0x02514000 },
         { .reg = 0x00009888, .val = 0x0E521880 },
         { .reg = 0x00009888, .val = 0x00521A80 },
         { .reg = 0x00009888, .val = 0x02520033 },
         { .reg = 0x00009888, .val = 0x0E534000 },
         { .reg = 0x00009888, .val = 0x00534000 },
         { .reg = 0x00009888, .val = 0x02531000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900062 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "e438cd5d-1da7-46c6-9823-4780f370d11d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12124D60 },
         { .reg = 0x00009888, .val = 0x12322E60 },
         { .reg = 0x00009888, .val = 0x12524D60 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0014 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0FE000 },
         { .reg = 0x00009888, .val = 0x0E0F0097 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x002D8000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x04121FB7 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x00308000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x00318000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x00321B80 },
         { .reg = 0x00009888, .val = 0x0632003F },
         { .reg = 0x00009888, .val = 0x00334000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0250C000 },
         { .reg = 0x00009888, .val = 0x0251C000 },
         { .reg = 0x00009888, .val = 0x02521FB7 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x02535000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900063 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "5f679fb0-909e-4c0e-b4b2-8e801f83e71b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121203E0 },
         { .reg = 0x00009888, .val = 0x123203E0 },
         { .reg = 0x00009888, .val = 0x125203E0 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0E0F006C },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x042D8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06114000 },
         { .reg = 0x00009888, .val = 0x06120033 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04308000 },
         { .reg = 0x00009888, .val = 0x04318000 },
         { .reg = 0x00009888, .val = 0x04321980 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x04334000 },
         { .reg = 0x00009888, .val = 0x04504000 },
         { .reg = 0x00009888, .val = 0x04514000 },
         { .reg = 0x00009888, .val = 0x04520033 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x04531000 },
         { .reg = 0x00009888, .val = 0x1190E000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C00 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00001000 },
         { .reg = 0x0000E558, .val = 0x00003002 },
         { .reg = 0x0000E658, .val = 0x00005004 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00050012 },
         { .reg = 0x0000E55C, .val = 0x00052051 },
         { .reg = 0x0000E65C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metrics set";
   query->symbol_name = "VMEPipe";
   query->guid = "0d09ba9a-1d1c-457d-83e2-74ac448014d6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A5800 },
         { .reg = 0x00009888, .val = 0x161A00C0 },
         { .reg = 0x00009888, .val = 0x12180240 },
         { .reg = 0x00009888, .val = 0x14180002 },
         { .reg = 0x00009888, .val = 0x143A5800 },
         { .reg = 0x00009888, .val = 0x163A00C0 },
         { .reg = 0x00009888, .val = 0x12380240 },
         { .reg = 0x00009888, .val = 0x14380002 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F3000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C1500 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F9500 },
         { .reg = 0x00009888, .val = 0x100F002A },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x0A2DC000 },
         { .reg = 0x00009888, .val = 0x0C2DC000 },
         { .reg = 0x00009888, .val = 0x04193000 },
         { .reg = 0x00009888, .val = 0x081A28C1 },
         { .reg = 0x00009888, .val = 0x001A0000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x0613C000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x00172000 },
         { .reg = 0x00009888, .val = 0x06178000 },
         { .reg = 0x00009888, .val = 0x0817A000 },
         { .reg = 0x00009888, .val = 0x00180037 },
         { .reg = 0x00009888, .val = 0x06180940 },
         { .reg = 0x00009888, .val = 0x08180000 },
         { .reg = 0x00009888, .val = 0x02180000 },
         { .reg = 0x00009888, .val = 0x04183000 },
         { .reg = 0x00009888, .val = 0x06393000 },
         { .reg = 0x00009888, .val = 0x0C3A28C1 },
         { .reg = 0x00009888, .val = 0x003A0000 },
         { .reg = 0x00009888, .val = 0x0A33F000 },
         { .reg = 0x00009888, .val = 0x0C33F000 },
         { .reg = 0x00009888, .val = 0x0A37A000 },
         { .reg = 0x00009888, .val = 0x0C37A000 },
         { .reg = 0x00009888, .val = 0x0A380977 },
         { .reg = 0x00009888, .val = 0x08380000 },
         { .reg = 0x00009888, .val = 0x04380000 },
         { .reg = 0x00009888, .val = 0x06383000 },
         { .reg = 0x00009888, .val = 0x119000FF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900800 },
         { .reg = 0x00009888, .val = 0x47901000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900844 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness metrics set";
   query->symbol_name = "GpuBusyness";
   query->guid = "e2f162ae-5732-4af0-8b11-69510f57094a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 8);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C00 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x19C05800 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x1D810400 },
         { .reg = 0x00009888, .val = 0x07960025 },
         { .reg = 0x00009888, .val = 0x21960000 },
         { .reg = 0x00009888, .val = 0x0B964000 },
         { .reg = 0x00009888, .val = 0x1B930062 },
         { .reg = 0x00009888, .val = 0x17948000 },
         { .reg = 0x00009888, .val = 0x1B940008 },
         { .reg = 0x00009888, .val = 0x05950075 },
         { .reg = 0x00009888, .val = 0x1D950000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x09978000 },
         { .reg = 0x00009888, .val = 0x05C08500 },
         { .reg = 0x00009888, .val = 0x25C00000 },
         { .reg = 0x00009888, .val = 0x1BC00000 },
         { .reg = 0x00009888, .val = 0x0BC54000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C60 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00078000 },
         { .reg = 0x00002774, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "d3c99326-e161-4e44-9745-7772bdc579c5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810000 },
         { .reg = 0x00009888, .val = 0x07810013 },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930040 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "5ccbf9fb-6bf2-456b-a749-bdff7b1aff13";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x122D3080 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0800 },
         { .reg = 0x00009888, .val = 0x0E0FAA00 },
         { .reg = 0x00009888, .val = 0x100F0002 },
         { .reg = 0x00009888, .val = 0x002D0025 },
         { .reg = 0x00009888, .val = 0x062D1300 },
         { .reg = 0x00009888, .val = 0x082D16A4 },
         { .reg = 0x00009888, .val = 0x0A2D162E },
         { .reg = 0x00009888, .val = 0x102D0000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt2__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt2_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute metrics set";
   query->symbol_name = "AsyncCompute";
   query->guid = "9802484e-5e21-4ad3-8060-12b7f3bb81fe";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt2__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt2__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt2__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_cflgt2(struct intel_perf_config *perf)
{
   cflgt2_register_render_basic_counter_query(perf);
   cflgt2_register_compute_basic_counter_query(perf);
   cflgt2_register_render_pipe_profile_counter_query(perf);
   cflgt2_register_memory_reads_counter_query(perf);
   cflgt2_register_memory_writes_counter_query(perf);
   cflgt2_register_compute_extended_counter_query(perf);
   cflgt2_register_compute_l3_cache_counter_query(perf);
   cflgt2_register_hdc_and_sf_counter_query(perf);
   cflgt2_register_l3_1_counter_query(perf);
   cflgt2_register_l3_2_counter_query(perf);
   cflgt2_register_l3_3_counter_query(perf);
   cflgt2_register_rasterizer_and_pixel_backend_counter_query(perf);
   cflgt2_register_sampler_counter_query(perf);
   cflgt2_register_tdl_1_counter_query(perf);
   cflgt2_register_tdl_2_counter_query(perf);
   cflgt2_register_compute_extra_counter_query(perf);
   cflgt2_register_vme_pipe_counter_query(perf);
   cflgt2_register_gpu_busyness_counter_query(perf);
   cflgt2_register_test_oa_counter_query(perf);
   cflgt2_register_pma__stall_counter_query(perf);
   cflgt2_register_async_compute_counter_query(perf);
}


static void
cflgt3_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "b316bcab-212f-4228-97de-af6b5a1a2ea1";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C01E0 },
         { .reg = 0x00009888, .val = 0x12170280 },
         { .reg = 0x00009888, .val = 0x12370280 },
         { .reg = 0x00009888, .val = 0x16EC01E0 },
         { .reg = 0x00009888, .val = 0x11930317 },
         { .reg = 0x00009888, .val = 0x159303DF },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x1A4E0380 },
         { .reg = 0x00009888, .val = 0x0A6C0053 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x0A1B4000 },
         { .reg = 0x00009888, .val = 0x1C1C0001 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x042F1000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C8400 },
         { .reg = 0x00009888, .val = 0x0C4C0002 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F6600 },
         { .reg = 0x00009888, .val = 0x100F0001 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x162CA200 },
         { .reg = 0x00009888, .val = 0x062D8000 },
         { .reg = 0x00009888, .val = 0x082D8000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x08133000 },
         { .reg = 0x00009888, .val = 0x00170020 },
         { .reg = 0x00009888, .val = 0x08170021 },
         { .reg = 0x00009888, .val = 0x10170000 },
         { .reg = 0x00009888, .val = 0x0633C000 },
         { .reg = 0x00009888, .val = 0x0833C000 },
         { .reg = 0x00009888, .val = 0x06370800 },
         { .reg = 0x00009888, .val = 0x08370840 },
         { .reg = 0x00009888, .val = 0x10370000 },
         { .reg = 0x00009888, .val = 0x1ACE0200 },
         { .reg = 0x00009888, .val = 0x0AEC5300 },
         { .reg = 0x00009888, .val = 0x10EC0000 },
         { .reg = 0x00009888, .val = 0x1CEC0000 },
         { .reg = 0x00009888, .val = 0x0A9B8000 },
         { .reg = 0x00009888, .val = 0x1C9C0002 },
         { .reg = 0x00009888, .val = 0x0CCC0002 },
         { .reg = 0x00009888, .val = 0x0A8D8000 },
         { .reg = 0x00009888, .val = 0x108F0001 },
         { .reg = 0x00009888, .val = 0x16AC8000 },
         { .reg = 0x00009888, .val = 0x0D933031 },
         { .reg = 0x00009888, .val = 0x0F933E3F },
         { .reg = 0x00009888, .val = 0x01933D00 },
         { .reg = 0x00009888, .val = 0x0393073C },
         { .reg = 0x00009888, .val = 0x0593000E },
         { .reg = 0x00009888, .val = 0x1D930000 },
         { .reg = 0x00009888, .val = 0x19930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D908000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51902240 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x55900242 },
         { .reg = 0x00009888, .val = 0x45900084 },
         { .reg = 0x00009888, .val = 0x47901400 },
         { .reg = 0x00009888, .val = 0x57902220 },
         { .reg = 0x00009888, .val = 0x49900C60 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900002 },
         { .reg = 0x00009888, .val = 0x43900C63 },
         { .reg = 0x00009888, .val = 0x53902222 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "d2eec5b1-4dd8-4170-ae83-9531abd0692f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x1A4E0820 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x064F0900 },
         { .reg = 0x00009888, .val = 0x084F0032 },
         { .reg = 0x00009888, .val = 0x0A4F1891 },
         { .reg = 0x00009888, .val = 0x0C4F0E00 },
         { .reg = 0x00009888, .val = 0x0E4F003C },
         { .reg = 0x00009888, .val = 0x004F0D80 },
         { .reg = 0x00009888, .val = 0x024F003B },
         { .reg = 0x00009888, .val = 0x006C0002 },
         { .reg = 0x00009888, .val = 0x086C0100 },
         { .reg = 0x00009888, .val = 0x0C6C000C },
         { .reg = 0x00009888, .val = 0x0E6C0B00 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x081B8000 },
         { .reg = 0x00009888, .val = 0x0C1B4000 },
         { .reg = 0x00009888, .val = 0x0E1B8000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1C8000 },
         { .reg = 0x00009888, .val = 0x1C1C0024 },
         { .reg = 0x00009888, .val = 0x065B8000 },
         { .reg = 0x00009888, .val = 0x085B4000 },
         { .reg = 0x00009888, .val = 0x0A5BC000 },
         { .reg = 0x00009888, .val = 0x0C5B8000 },
         { .reg = 0x00009888, .val = 0x0E5B4000 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025B4000 },
         { .reg = 0x00009888, .val = 0x1A5C6000 },
         { .reg = 0x00009888, .val = 0x1C5C001B },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2000 },
         { .reg = 0x00009888, .val = 0x0C4C0208 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2CC000 },
         { .reg = 0x00009888, .val = 0x162CFB00 },
         { .reg = 0x00009888, .val = 0x182C00BE },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x19900157 },
         { .reg = 0x00009888, .val = 0x1B900158 },
         { .reg = 0x00009888, .val = 0x1D900105 },
         { .reg = 0x00009888, .val = 0x1F900103 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x11900FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900821 },
         { .reg = 0x00009888, .val = 0x47900802 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900802 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900002 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900422 },
         { .reg = 0x00009888, .val = 0x53904444 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "9875e050-b1bc-45e6-a6ab-665594601df9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C0E001F },
         { .reg = 0x00009888, .val = 0x0A0F0000 },
         { .reg = 0x00009888, .val = 0x10116800 },
         { .reg = 0x00009888, .val = 0x178A03E0 },
         { .reg = 0x00009888, .val = 0x11824C00 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x13840020 },
         { .reg = 0x00009888, .val = 0x11850019 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x01870C40 },
         { .reg = 0x00009888, .val = 0x17880000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x0A4C0040 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020E5400 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x080F0040 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0040 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06110012 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x01898000 },
         { .reg = 0x00009888, .val = 0x0D890100 },
         { .reg = 0x00009888, .val = 0x03898000 },
         { .reg = 0x00009888, .val = 0x09808000 },
         { .reg = 0x00009888, .val = 0x0B808000 },
         { .reg = 0x00009888, .val = 0x0380C000 },
         { .reg = 0x00009888, .val = 0x0F8A0075 },
         { .reg = 0x00009888, .val = 0x1D8A0000 },
         { .reg = 0x00009888, .val = 0x118A8000 },
         { .reg = 0x00009888, .val = 0x1B8A4000 },
         { .reg = 0x00009888, .val = 0x138A8000 },
         { .reg = 0x00009888, .val = 0x1D81A000 },
         { .reg = 0x00009888, .val = 0x15818000 },
         { .reg = 0x00009888, .val = 0x17818000 },
         { .reg = 0x00009888, .val = 0x0B820030 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x0D824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x05824000 },
         { .reg = 0x00009888, .val = 0x0D830003 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x03838000 },
         { .reg = 0x00009888, .val = 0x07838000 },
         { .reg = 0x00009888, .val = 0x0B840980 },
         { .reg = 0x00009888, .val = 0x03844D80 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x09848000 },
         { .reg = 0x00009888, .val = 0x09850080 },
         { .reg = 0x00009888, .val = 0x03850003 },
         { .reg = 0x00009888, .val = 0x01850000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x09870032 },
         { .reg = 0x00009888, .val = 0x01888052 },
         { .reg = 0x00009888, .val = 0x11880000 },
         { .reg = 0x00009888, .val = 0x09884000 },
         { .reg = 0x00009888, .val = 0x1B931001 },
         { .reg = 0x00009888, .val = 0x1D930001 },
         { .reg = 0x00009888, .val = 0x19934000 },
         { .reg = 0x00009888, .val = 0x1B958000 },
         { .reg = 0x00009888, .val = 0x1D950094 },
         { .reg = 0x00009888, .val = 0x19958000 },
         { .reg = 0x00009888, .val = 0x09E58000 },
         { .reg = 0x00009888, .val = 0x0BE58000 },
         { .reg = 0x00009888, .val = 0x03E5C000 },
         { .reg = 0x00009888, .val = 0x0592C000 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D924000 },
         { .reg = 0x00009888, .val = 0x0F924000 },
         { .reg = 0x00009888, .val = 0x11928000 },
         { .reg = 0x00009888, .val = 0x1392C000 },
         { .reg = 0x00009888, .val = 0x09924000 },
         { .reg = 0x00009888, .val = 0x01985000 },
         { .reg = 0x00009888, .val = 0x07988000 },
         { .reg = 0x00009888, .val = 0x09981000 },
         { .reg = 0x00009888, .val = 0x0B982000 },
         { .reg = 0x00009888, .val = 0x0D982000 },
         { .reg = 0x00009888, .val = 0x0F989000 },
         { .reg = 0x00009888, .val = 0x05982000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1190C080 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900440 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x47900C21 },
         { .reg = 0x00009888, .val = 0x57900400 },
         { .reg = 0x00009888, .val = 0x49900042 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900024 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900841 },
         { .reg = 0x00009888, .val = 0x53900400 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "49c65f34-e625-4ca4-86b7-88693e624d4c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F900064 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900150 },
         { .reg = 0x00009888, .val = 0x21900151 },
         { .reg = 0x00009888, .val = 0x23900152 },
         { .reg = 0x00009888, .val = 0x25900153 },
         { .reg = 0x00009888, .val = 0x27900154 },
         { .reg = 0x00009888, .val = 0x29900155 },
         { .reg = 0x00009888, .val = 0x2B900156 },
         { .reg = 0x00009888, .val = 0x2D900157 },
         { .reg = 0x00009888, .val = 0x2F90015F },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "45c9e8ee-2998-4d83-88e8-9cb7e03287bf";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810C00 },
         { .reg = 0x00009888, .val = 0x1381001A },
         { .reg = 0x00009888, .val = 0x37906800 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x03811300 },
         { .reg = 0x00009888, .val = 0x05811B12 },
         { .reg = 0x00009888, .val = 0x0781001A },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x17810000 },
         { .reg = 0x00009888, .val = 0x19810000 },
         { .reg = 0x00009888, .val = 0x1B810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930055 },
         { .reg = 0x00009888, .val = 0x03E58000 },
         { .reg = 0x00009888, .val = 0x05E5C000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x13900160 },
         { .reg = 0x00009888, .val = 0x21900161 },
         { .reg = 0x00009888, .val = 0x23900162 },
         { .reg = 0x00009888, .val = 0x25900163 },
         { .reg = 0x00009888, .val = 0x27900164 },
         { .reg = 0x00009888, .val = 0x29900165 },
         { .reg = 0x00009888, .val = 0x2B900166 },
         { .reg = 0x00009888, .val = 0x2D900167 },
         { .reg = 0x00009888, .val = 0x2F900150 },
         { .reg = 0x00009888, .val = 0x31900105 },
         { .reg = 0x00009888, .val = 0x15900103 },
         { .reg = 0x00009888, .val = 0x17900101 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C60 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900C00 },
         { .reg = 0x00009888, .val = 0x47900C63 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900C63 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900063 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended metrics set";
   query->symbol_name = "ComputeExtended";
   query->guid = "bac415ce-d7a2-4f8d-9b16-834deba7330e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x106C00E0 },
         { .reg = 0x00009888, .val = 0x141C8160 },
         { .reg = 0x00009888, .val = 0x161C8015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4EAAA0 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0E6C0B01 },
         { .reg = 0x00009888, .val = 0x006C0200 },
         { .reg = 0x00009888, .val = 0x026C000C },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x001C0041 },
         { .reg = 0x00009888, .val = 0x061C4200 },
         { .reg = 0x00009888, .val = 0x081C4443 },
         { .reg = 0x00009888, .val = 0x0A1C4645 },
         { .reg = 0x00009888, .val = 0x0C1C7647 },
         { .reg = 0x00009888, .val = 0x041C7357 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x1A1C0000 },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4CAA2A },
         { .reg = 0x00009888, .val = 0x0C4C02AA },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x000DA000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x0C0F5400 },
         { .reg = 0x00009888, .val = 0x0E0F5515 },
         { .reg = 0x00009888, .val = 0x100F0155 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x11907FFF },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900040 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900802 },
         { .reg = 0x00009888, .val = 0x47900842 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900842 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x43900800 },
         { .reg = 0x00009888, .val = 0x53900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache metrics set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "a8cfca44-0e74-4338-9e57-3daad98957dd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C0760 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900003 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x184E8000 },
         { .reg = 0x00009888, .val = 0x1A4E8020 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x186C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x101C8000 },
         { .reg = 0x00009888, .val = 0x1A1CE000 },
         { .reg = 0x00009888, .val = 0x1C1C0030 },
         { .reg = 0x00009888, .val = 0x004C8000 },
         { .reg = 0x00009888, .val = 0x0A4C2A00 },
         { .reg = 0x00009888, .val = 0x0C4C0280 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F1500 },
         { .reg = 0x00009888, .val = 0x100F0140 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162C0A00 },
         { .reg = 0x00009888, .val = 0x182C00A0 },
         { .reg = 0x00009888, .val = 0x03933300 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x1B930000 },
         { .reg = 0x00009888, .val = 0x1D900157 },
         { .reg = 0x00009888, .val = 0x1F900158 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1190030F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900021 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x53904444 },
         { .reg = 0x00009888, .val = 0x43900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "c4bee67c-0de6-4bc2-9900-5388dcc8adca";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x106C0232 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x004F1880 },
         { .reg = 0x00009888, .val = 0x024F08BB },
         { .reg = 0x00009888, .val = 0x044F001B },
         { .reg = 0x00009888, .val = 0x046C0100 },
         { .reg = 0x00009888, .val = 0x066C000B },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x041B8000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x005B8000 },
         { .reg = 0x00009888, .val = 0x025BC000 },
         { .reg = 0x00009888, .val = 0x045B4000 },
         { .reg = 0x00009888, .val = 0x125C8000 },
         { .reg = 0x00009888, .val = 0x145C8000 },
         { .reg = 0x00009888, .val = 0x165C8000 },
         { .reg = 0x00009888, .val = 0x185C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00A0 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x022CC000 },
         { .reg = 0x00009888, .val = 0x042CC000 },
         { .reg = 0x00009888, .val = 0x062CC000 },
         { .reg = 0x00009888, .val = 0x082CC000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x09830000 },
         { .reg = 0x00009888, .val = 0x07830000 },
         { .reg = 0x00009888, .val = 0x1D950080 },
         { .reg = 0x00009888, .val = 0x13928000 },
         { .reg = 0x00009888, .val = 0x0F988000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900040 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss2)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "2f77a650-54a6-4777-98dc-e49000c13ceb";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C7B40 },
         { .reg = 0x00009888, .val = 0x166C0020 },
         { .reg = 0x00009888, .val = 0x0A603444 },
         { .reg = 0x00009888, .val = 0x0A613400 },
         { .reg = 0x00009888, .val = 0x1A4EA800 },
         { .reg = 0x00009888, .val = 0x1C4E0002 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1E6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0800 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x0E1BC000 },
         { .reg = 0x00009888, .val = 0x001B8000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x1C1C003C },
         { .reg = 0x00009888, .val = 0x121C8000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x10600000 },
         { .reg = 0x00009888, .val = 0x04600000 },
         { .reg = 0x00009888, .val = 0x0C610044 },
         { .reg = 0x00009888, .val = 0x10610000 },
         { .reg = 0x00009888, .val = 0x06610000 },
         { .reg = 0x00009888, .val = 0x0C4C02A8 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0154 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x182C00AA },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190FFC0 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900021 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900400 },
         { .reg = 0x00009888, .val = 0x43900421 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_2";
   query->symbol_name = "L3_2";
   query->guid = "61cf6b31-9ddd-4deb-bae5-7339dd1f63b3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C02E0 },
         { .reg = 0x00009888, .val = 0x146C0001 },
         { .reg = 0x00009888, .val = 0x0A623400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x064F4000 },
         { .reg = 0x00009888, .val = 0x026C3324 },
         { .reg = 0x00009888, .val = 0x046C3422 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C0800 },
         { .reg = 0x00009888, .val = 0x065B4000 },
         { .reg = 0x00009888, .val = 0x1A5C1000 },
         { .reg = 0x00009888, .val = 0x06614000 },
         { .reg = 0x00009888, .val = 0x0C620044 },
         { .reg = 0x00009888, .val = 0x10620000 },
         { .reg = 0x00009888, .val = 0x06620000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C002A },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2CC000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_2__l30_bank2_stalled__read;
         counter->name = "Slice0 L3 Bank2 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank2Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_2__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_3";
   query->symbol_name = "L3_3";
   query->guid = "1421ea8b-a4c9-4383-922a-60ba96c42f24";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x126C4E80 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x0A633400 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E8000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x026C3321 },
         { .reg = 0x00009888, .val = 0x046C342F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1A6C2000 },
         { .reg = 0x00009888, .val = 0x021BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x061B4000 },
         { .reg = 0x00009888, .val = 0x141C8000 },
         { .reg = 0x00009888, .val = 0x161C8000 },
         { .reg = 0x00009888, .val = 0x181C8000 },
         { .reg = 0x00009888, .val = 0x1A1C1800 },
         { .reg = 0x00009888, .val = 0x06604000 },
         { .reg = 0x00009888, .val = 0x0C630044 },
         { .reg = 0x00009888, .val = 0x10630000 },
         { .reg = 0x00009888, .val = 0x06630000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C00AA },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E0F0055 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190F800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900002 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00028002 },
         { .reg = 0x0000277C, .val = 0x000087FF },
         { .reg = 0x00002780, .val = 0x00020002 },
         { .reg = 0x00002784, .val = 0x00008FFF },
         { .reg = 0x00002788, .val = 0x00008002 },
         { .reg = 0x0000278C, .val = 0x0000A7FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__l3_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_3__l30_bank3_stalled__read;
         counter->name = "Slice0 L3 Bank3 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank3Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_3__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__l3_3__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "6e1f0a90-c65f-4da9-86d0-c79b3c0063f6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102F3800 },
         { .reg = 0x00009888, .val = 0x144D0500 },
         { .reg = 0x00009888, .val = 0x120D03C0 },
         { .reg = 0x00009888, .val = 0x140D03CF },
         { .reg = 0x00009888, .val = 0x0C0F0004 },
         { .reg = 0x00009888, .val = 0x0C4E4000 },
         { .reg = 0x00009888, .val = 0x042F0480 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x022F0000 },
         { .reg = 0x00009888, .val = 0x0A4C0090 },
         { .reg = 0x00009888, .val = 0x064D0027 },
         { .reg = 0x00009888, .val = 0x004D0000 },
         { .reg = 0x00009888, .val = 0x000D0D40 },
         { .reg = 0x00009888, .val = 0x020D803F },
         { .reg = 0x00009888, .val = 0x040D8023 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x020F0010 },
         { .reg = 0x00009888, .val = 0x000F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0050 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41901400 },
         { .reg = 0x00009888, .val = 0x43901485 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "7cb05eab-0857-4189-93bc-09326d298637";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x14152C00 },
         { .reg = 0x00009888, .val = 0x16150005 },
         { .reg = 0x00009888, .val = 0x121600A0 },
         { .reg = 0x00009888, .val = 0x14352C00 },
         { .reg = 0x00009888, .val = 0x16350005 },
         { .reg = 0x00009888, .val = 0x123600A0 },
         { .reg = 0x00009888, .val = 0x14552C00 },
         { .reg = 0x00009888, .val = 0x16550005 },
         { .reg = 0x00009888, .val = 0x125600A0 },
         { .reg = 0x00009888, .val = 0x062F6000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x0C4C0050 },
         { .reg = 0x00009888, .val = 0x0A4C0010 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F0350 },
         { .reg = 0x00009888, .val = 0x0C0FB000 },
         { .reg = 0x00009888, .val = 0x0E0F00DA },
         { .reg = 0x00009888, .val = 0x182C0028 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x022DC000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x0C138000 },
         { .reg = 0x00009888, .val = 0x0E132000 },
         { .reg = 0x00009888, .val = 0x0413C000 },
         { .reg = 0x00009888, .val = 0x1C140018 },
         { .reg = 0x00009888, .val = 0x0C157000 },
         { .reg = 0x00009888, .val = 0x0E150078 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x04162180 },
         { .reg = 0x00009888, .val = 0x02160000 },
         { .reg = 0x00009888, .val = 0x04174000 },
         { .reg = 0x00009888, .val = 0x0233A000 },
         { .reg = 0x00009888, .val = 0x04333000 },
         { .reg = 0x00009888, .val = 0x14348000 },
         { .reg = 0x00009888, .val = 0x16348000 },
         { .reg = 0x00009888, .val = 0x02357870 },
         { .reg = 0x00009888, .val = 0x10350000 },
         { .reg = 0x00009888, .val = 0x04360043 },
         { .reg = 0x00009888, .val = 0x02360000 },
         { .reg = 0x00009888, .val = 0x04371000 },
         { .reg = 0x00009888, .val = 0x0E538000 },
         { .reg = 0x00009888, .val = 0x00538000 },
         { .reg = 0x00009888, .val = 0x06533000 },
         { .reg = 0x00009888, .val = 0x1C540020 },
         { .reg = 0x00009888, .val = 0x12548000 },
         { .reg = 0x00009888, .val = 0x0E557000 },
         { .reg = 0x00009888, .val = 0x00557800 },
         { .reg = 0x00009888, .val = 0x10550000 },
         { .reg = 0x00009888, .val = 0x06560043 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x06571000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900060 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900842 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900060 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "05a06d02-0c12-486b-bb1d-9be053848980";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12120000 },
         { .reg = 0x00009888, .val = 0x12320000 },
         { .reg = 0x00009888, .val = 0x12520000 },
         { .reg = 0x00009888, .val = 0x002F8000 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0015 },
         { .reg = 0x00009888, .val = 0x0C0D8000 },
         { .reg = 0x00009888, .val = 0x0E0DA000 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x100F03A0 },
         { .reg = 0x00009888, .val = 0x0C0FF000 },
         { .reg = 0x00009888, .val = 0x0E0F0095 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x02108000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x02118000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x02121880 },
         { .reg = 0x00009888, .val = 0x041219B5 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x02134000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x0C308000 },
         { .reg = 0x00009888, .val = 0x0E304000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x0C318000 },
         { .reg = 0x00009888, .val = 0x0E314000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x0C321A80 },
         { .reg = 0x00009888, .val = 0x0E320033 },
         { .reg = 0x00009888, .val = 0x06320031 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x0C334000 },
         { .reg = 0x00009888, .val = 0x0E331000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0E508000 },
         { .reg = 0x00009888, .val = 0x00508000 },
         { .reg = 0x00009888, .val = 0x02504000 },
         { .reg = 0x00009888, .val = 0x0E518000 },
         { .reg = 0x00009888, .val = 0x00518000 },
         { .reg = 0x00009888, .val = 0x02514000 },
         { .reg = 0x00009888, .val = 0x0E521880 },
         { .reg = 0x00009888, .val = 0x00521A80 },
         { .reg = 0x00009888, .val = 0x02520033 },
         { .reg = 0x00009888, .val = 0x0E534000 },
         { .reg = 0x00009888, .val = 0x00534000 },
         { .reg = 0x00009888, .val = 0x02531000 },
         { .reg = 0x00009888, .val = 0x1190FF80 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900800 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4B900062 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "cddbd1dc-41a2-4fe4-8b05-3d2773120814";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12124D60 },
         { .reg = 0x00009888, .val = 0x12322E60 },
         { .reg = 0x00009888, .val = 0x12524D60 },
         { .reg = 0x00009888, .val = 0x022F3000 },
         { .reg = 0x00009888, .val = 0x0A4C0014 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0FE000 },
         { .reg = 0x00009888, .val = 0x0E0F0097 },
         { .reg = 0x00009888, .val = 0x082C8000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x002D8000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x0410C000 },
         { .reg = 0x00009888, .val = 0x0411C000 },
         { .reg = 0x00009888, .val = 0x04121FB7 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x04135000 },
         { .reg = 0x00009888, .val = 0x00308000 },
         { .reg = 0x00009888, .val = 0x06304000 },
         { .reg = 0x00009888, .val = 0x00318000 },
         { .reg = 0x00009888, .val = 0x06314000 },
         { .reg = 0x00009888, .val = 0x00321B80 },
         { .reg = 0x00009888, .val = 0x0632003F },
         { .reg = 0x00009888, .val = 0x00334000 },
         { .reg = 0x00009888, .val = 0x06331000 },
         { .reg = 0x00009888, .val = 0x0250C000 },
         { .reg = 0x00009888, .val = 0x0251C000 },
         { .reg = 0x00009888, .val = 0x02521FB7 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x02535000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900800 },
         { .reg = 0x00009888, .val = 0x43900063 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "e61ae394-9d9e-4204-a735-1dad7e44d953";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121203E0 },
         { .reg = 0x00009888, .val = 0x123203E0 },
         { .reg = 0x00009888, .val = 0x125203E0 },
         { .reg = 0x00009888, .val = 0x129203E0 },
         { .reg = 0x00009888, .val = 0x12B203E0 },
         { .reg = 0x00009888, .val = 0x12D203E0 },
         { .reg = 0x00009888, .val = 0x024EC000 },
         { .reg = 0x00009888, .val = 0x044EC000 },
         { .reg = 0x00009888, .val = 0x064EC000 },
         { .reg = 0x00009888, .val = 0x022F4000 },
         { .reg = 0x00009888, .val = 0x084CA000 },
         { .reg = 0x00009888, .val = 0x0A4C0042 },
         { .reg = 0x00009888, .val = 0x000D8000 },
         { .reg = 0x00009888, .val = 0x020DA000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x0C0F5000 },
         { .reg = 0x00009888, .val = 0x0E0F006D },
         { .reg = 0x00009888, .val = 0x022C8000 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x062C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x042D8000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x06114000 },
         { .reg = 0x00009888, .val = 0x06120033 },
         { .reg = 0x00009888, .val = 0x00120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04308000 },
         { .reg = 0x00009888, .val = 0x04318000 },
         { .reg = 0x00009888, .val = 0x04321980 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x04334000 },
         { .reg = 0x00009888, .val = 0x04504000 },
         { .reg = 0x00009888, .val = 0x04514000 },
         { .reg = 0x00009888, .val = 0x04520033 },
         { .reg = 0x00009888, .val = 0x00520000 },
         { .reg = 0x00009888, .val = 0x04531000 },
         { .reg = 0x00009888, .val = 0x00AF8000 },
         { .reg = 0x00009888, .val = 0x0ACC0001 },
         { .reg = 0x00009888, .val = 0x008D8000 },
         { .reg = 0x00009888, .val = 0x028DA000 },
         { .reg = 0x00009888, .val = 0x0C8FB000 },
         { .reg = 0x00009888, .val = 0x0E8F0001 },
         { .reg = 0x00009888, .val = 0x06AC8000 },
         { .reg = 0x00009888, .val = 0x02AD4000 },
         { .reg = 0x00009888, .val = 0x02908000 },
         { .reg = 0x00009888, .val = 0x02918000 },
         { .reg = 0x00009888, .val = 0x02921980 },
         { .reg = 0x00009888, .val = 0x00920000 },
         { .reg = 0x00009888, .val = 0x02934000 },
         { .reg = 0x00009888, .val = 0x02B04000 },
         { .reg = 0x00009888, .val = 0x02B14000 },
         { .reg = 0x00009888, .val = 0x02B20033 },
         { .reg = 0x00009888, .val = 0x00B20000 },
         { .reg = 0x00009888, .val = 0x02B31000 },
         { .reg = 0x00009888, .val = 0x00D08000 },
         { .reg = 0x00009888, .val = 0x00D18000 },
         { .reg = 0x00009888, .val = 0x00D21980 },
         { .reg = 0x00009888, .val = 0x00D34000 },
         { .reg = 0x00009888, .val = 0x1190FC00 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900C00 },
         { .reg = 0x00009888, .val = 0x43900002 },
         { .reg = 0x00009888, .val = 0x53900420 },
         { .reg = 0x00009888, .val = 0x459000A1 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_vme_pipe_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Media Vme Pipe metrics set";
   query->symbol_name = "VMEPipe";
   query->guid = "94272ad9-45ee-4e34-b7a7-51546cd6405c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A5800 },
         { .reg = 0x00009888, .val = 0x161A00C0 },
         { .reg = 0x00009888, .val = 0x12180240 },
         { .reg = 0x00009888, .val = 0x14180002 },
         { .reg = 0x00009888, .val = 0x149A5800 },
         { .reg = 0x00009888, .val = 0x169A00C0 },
         { .reg = 0x00009888, .val = 0x12980240 },
         { .reg = 0x00009888, .val = 0x14980002 },
         { .reg = 0x00009888, .val = 0x1A4E3FC0 },
         { .reg = 0x00009888, .val = 0x002F1000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F3000 },
         { .reg = 0x00009888, .val = 0x004C4000 },
         { .reg = 0x00009888, .val = 0x0A4C9500 },
         { .reg = 0x00009888, .val = 0x0C4C002A },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0400 },
         { .reg = 0x00009888, .val = 0x0E0F5500 },
         { .reg = 0x00009888, .val = 0x100F0015 },
         { .reg = 0x00009888, .val = 0x002C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x162CAA00 },
         { .reg = 0x00009888, .val = 0x182C000A },
         { .reg = 0x00009888, .val = 0x04193000 },
         { .reg = 0x00009888, .val = 0x081A28C1 },
         { .reg = 0x00009888, .val = 0x001A0000 },
         { .reg = 0x00009888, .val = 0x00133000 },
         { .reg = 0x00009888, .val = 0x0613C000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x00172000 },
         { .reg = 0x00009888, .val = 0x06178000 },
         { .reg = 0x00009888, .val = 0x0817A000 },
         { .reg = 0x00009888, .val = 0x00180037 },
         { .reg = 0x00009888, .val = 0x06180940 },
         { .reg = 0x00009888, .val = 0x08180000 },
         { .reg = 0x00009888, .val = 0x02180000 },
         { .reg = 0x00009888, .val = 0x04183000 },
         { .reg = 0x00009888, .val = 0x04AFC000 },
         { .reg = 0x00009888, .val = 0x06AF3000 },
         { .reg = 0x00009888, .val = 0x0ACC4000 },
         { .reg = 0x00009888, .val = 0x0CCC0015 },
         { .reg = 0x00009888, .val = 0x0A8DA000 },
         { .reg = 0x00009888, .val = 0x0C8DA000 },
         { .reg = 0x00009888, .val = 0x0E8F4000 },
         { .reg = 0x00009888, .val = 0x108F0015 },
         { .reg = 0x00009888, .val = 0x16ACA000 },
         { .reg = 0x00009888, .val = 0x18AC000A },
         { .reg = 0x00009888, .val = 0x06993000 },
         { .reg = 0x00009888, .val = 0x0C9A28C1 },
         { .reg = 0x00009888, .val = 0x009A0000 },
         { .reg = 0x00009888, .val = 0x0A93F000 },
         { .reg = 0x00009888, .val = 0x0C93F000 },
         { .reg = 0x00009888, .val = 0x0A97A000 },
         { .reg = 0x00009888, .val = 0x0C97A000 },
         { .reg = 0x00009888, .val = 0x0A980977 },
         { .reg = 0x00009888, .val = 0x08980000 },
         { .reg = 0x00009888, .val = 0x04980000 },
         { .reg = 0x00009888, .val = 0x06983000 },
         { .reg = 0x00009888, .val = 0x119000FF },
         { .reg = 0x00009888, .val = 0x51900040 },
         { .reg = 0x00009888, .val = 0x41900020 },
         { .reg = 0x00009888, .val = 0x55900004 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x479008A5 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900002 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00100030 },
         { .reg = 0x00002774, .val = 0x0000FFF9 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFFC },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFF3 },
         { .reg = 0x00002788, .val = 0x00100180 },
         { .reg = 0x0000278C, .val = 0x0000FFCF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00000002 },
         { .reg = 0x0000279C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__vme_pipe__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__vme_pipe__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__vme_pipe__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__vme_pipe__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__vme_pipe__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__vme_pipe__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__vme_pipe__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__vme_pipe__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__vme_pipe__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__vme_pipe__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__vme_pipe__vme_busy__read;
      counter->name = "VME Busy";
      counter->desc = "The percentage of time in which VME (IME or CRE) was actively processing data. Unit: percent.";
      counter->symbol_name = "VMEBusy";
      counter->category = "VME Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness metrics set";
   query->symbol_name = "GpuBusyness";
   query->guid = "22b7e0c2-cade-425f-b099-34479768c72a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 9);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19D05800 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C25 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x19C05800 },
         { .reg = 0x00009888, .val = 0x05D00085 },
         { .reg = 0x00009888, .val = 0x25D00000 },
         { .reg = 0x00009888, .val = 0x09D54000 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x1D810400 },
         { .reg = 0x00009888, .val = 0x21960000 },
         { .reg = 0x00009888, .val = 0x0996C000 },
         { .reg = 0x00009888, .val = 0x0B964000 },
         { .reg = 0x00009888, .val = 0x19938000 },
         { .reg = 0x00009888, .val = 0x1B930068 },
         { .reg = 0x00009888, .val = 0x15948000 },
         { .reg = 0x00009888, .val = 0x1B94000C },
         { .reg = 0x00009888, .val = 0x03957500 },
         { .reg = 0x00009888, .val = 0x1D950000 },
         { .reg = 0x00009888, .val = 0x17950000 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x07928000 },
         { .reg = 0x00009888, .val = 0x03988000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x09978000 },
         { .reg = 0x00009888, .val = 0x05C08500 },
         { .reg = 0x00009888, .val = 0x25C00000 },
         { .reg = 0x00009888, .val = 0x1BC00000 },
         { .reg = 0x00009888, .val = 0x0BC54000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x43900463 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900040 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0007C000 },
         { .reg = 0x00002774, .val = 0x000007FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "f240ef8d-4a2e-4961-a63c-cc43b01ed923";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x11810000 },
         { .reg = 0x00009888, .val = 0x07810013 },
         { .reg = 0x00009888, .val = 0x1F810000 },
         { .reg = 0x00009888, .val = 0x1D810000 },
         { .reg = 0x00009888, .val = 0x1B930040 },
         { .reg = 0x00009888, .val = 0x07E54000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x11900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "c11af8d1-858b-4f8b-98fb-8d683ba8bda0";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x122D3080 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x0A0DA000 },
         { .reg = 0x00009888, .val = 0x0C0F0800 },
         { .reg = 0x00009888, .val = 0x0E0FAA00 },
         { .reg = 0x00009888, .val = 0x100F0002 },
         { .reg = 0x00009888, .val = 0x002D0025 },
         { .reg = 0x00009888, .val = 0x062D1300 },
         { .reg = 0x00009888, .val = 0x082D16A4 },
         { .reg = 0x00009888, .val = 0x0A2D162E },
         { .reg = 0x00009888, .val = 0x102D0000 },
         { .reg = 0x00009888, .val = 0x1190003F },
         { .reg = 0x00009888, .val = 0x51900000 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = cflgt3__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
cflgt3_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute metrics set";
   query->symbol_name = "AsyncCompute";
   query->guid = "0c824ec1-af53-4545-84a3-ede5e3572fc1";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0x00222222 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = cflgt3__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = cflgt3__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = cflgt3__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_cflgt3(struct intel_perf_config *perf)
{
   cflgt3_register_render_basic_counter_query(perf);
   cflgt3_register_compute_basic_counter_query(perf);
   cflgt3_register_render_pipe_profile_counter_query(perf);
   cflgt3_register_memory_reads_counter_query(perf);
   cflgt3_register_memory_writes_counter_query(perf);
   cflgt3_register_compute_extended_counter_query(perf);
   cflgt3_register_compute_l3_cache_counter_query(perf);
   cflgt3_register_hdc_and_sf_counter_query(perf);
   cflgt3_register_l3_1_counter_query(perf);
   cflgt3_register_l3_2_counter_query(perf);
   cflgt3_register_l3_3_counter_query(perf);
   cflgt3_register_rasterizer_and_pixel_backend_counter_query(perf);
   cflgt3_register_sampler_counter_query(perf);
   cflgt3_register_tdl_1_counter_query(perf);
   cflgt3_register_tdl_2_counter_query(perf);
   cflgt3_register_compute_extra_counter_query(perf);
   cflgt3_register_vme_pipe_counter_query(perf);
   cflgt3_register_gpu_busyness_counter_query(perf);
   cflgt3_register_test_oa_counter_query(perf);
   cflgt3_register_pma__stall_counter_query(perf);
   cflgt3_register_async_compute_counter_query(perf);
}


static void
bxt_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "22b9519a-e9ba-4c41-8b54-f4f8ca14fa0a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.revision >= 0x03) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009888, .val = 0x166C00F0 },
            { .reg = 0x00009888, .val = 0x12120280 },
            { .reg = 0x00009888, .val = 0x12320280 },
            { .reg = 0x00009888, .val = 0x11930317 },
            { .reg = 0x00009888, .val = 0x159303DF },
            { .reg = 0x00009888, .val = 0x3F900C00 },
            { .reg = 0x00009888, .val = 0x419000A0 },
            { .reg = 0x00009888, .val = 0x002D1000 },
            { .reg = 0x00009888, .val = 0x062D4000 },
            { .reg = 0x00009888, .val = 0x082D5000 },
            { .reg = 0x00009888, .val = 0x0A2D1000 },
            { .reg = 0x00009888, .val = 0x0C2E0800 },
            { .reg = 0x00009888, .val = 0x0E2E5900 },
            { .reg = 0x00009888, .val = 0x0A4C8000 },
            { .reg = 0x00009888, .val = 0x0C4C8000 },
            { .reg = 0x00009888, .val = 0x0E4C4000 },
            { .reg = 0x00009888, .val = 0x064E8000 },
            { .reg = 0x00009888, .val = 0x084E8000 },
            { .reg = 0x00009888, .val = 0x0A4E2000 },
            { .reg = 0x00009888, .val = 0x1C4F0010 },
            { .reg = 0x00009888, .val = 0x0A6C0053 },
            { .reg = 0x00009888, .val = 0x106C0000 },
            { .reg = 0x00009888, .val = 0x1C6C0000 },
            { .reg = 0x00009888, .val = 0x1A0FCC00 },
            { .reg = 0x00009888, .val = 0x1C0F0002 },
            { .reg = 0x00009888, .val = 0x1C2C0040 },
            { .reg = 0x00009888, .val = 0x00101000 },
            { .reg = 0x00009888, .val = 0x04101000 },
            { .reg = 0x00009888, .val = 0x00114000 },
            { .reg = 0x00009888, .val = 0x08114000 },
            { .reg = 0x00009888, .val = 0x00120020 },
            { .reg = 0x00009888, .val = 0x08120021 },
            { .reg = 0x00009888, .val = 0x00141000 },
            { .reg = 0x00009888, .val = 0x08141000 },
            { .reg = 0x00009888, .val = 0x02308000 },
            { .reg = 0x00009888, .val = 0x04302000 },
            { .reg = 0x00009888, .val = 0x06318000 },
            { .reg = 0x00009888, .val = 0x08318000 },
            { .reg = 0x00009888, .val = 0x06320800 },
            { .reg = 0x00009888, .val = 0x08320840 },
            { .reg = 0x00009888, .val = 0x00320000 },
            { .reg = 0x00009888, .val = 0x06344000 },
            { .reg = 0x00009888, .val = 0x08344000 },
            { .reg = 0x00009888, .val = 0x0D931831 },
            { .reg = 0x00009888, .val = 0x0F939F3F },
            { .reg = 0x00009888, .val = 0x01939E80 },
            { .reg = 0x00009888, .val = 0x039303BC },
            { .reg = 0x00009888, .val = 0x0593000E },
            { .reg = 0x00009888, .val = 0x1993002A },
            { .reg = 0x00009888, .val = 0x07930000 },
            { .reg = 0x00009888, .val = 0x09930000 },
            { .reg = 0x00009888, .val = 0x1D900177 },
            { .reg = 0x00009888, .val = 0x1F900187 },
            { .reg = 0x00009888, .val = 0x35900000 },
            { .reg = 0x00009888, .val = 0x13904000 },
            { .reg = 0x00009888, .val = 0x21904000 },
            { .reg = 0x00009888, .val = 0x23904000 },
            { .reg = 0x00009888, .val = 0x25904000 },
            { .reg = 0x00009888, .val = 0x27904000 },
            { .reg = 0x00009888, .val = 0x2B904000 },
            { .reg = 0x00009888, .val = 0x2D904000 },
            { .reg = 0x00009888, .val = 0x2F904000 },
            { .reg = 0x00009888, .val = 0x31904000 },
            { .reg = 0x00009888, .val = 0x15904000 },
            { .reg = 0x00009888, .val = 0x17904000 },
            { .reg = 0x00009888, .val = 0x19904000 },
            { .reg = 0x00009888, .val = 0x1B904000 },
            { .reg = 0x00009888, .val = 0x53901110 },
            { .reg = 0x00009888, .val = 0x43900423 },
            { .reg = 0x00009888, .val = 0x55900111 },
            { .reg = 0x00009888, .val = 0x47900C02 },
            { .reg = 0x00009888, .val = 0x57900000 },
            { .reg = 0x00009888, .val = 0x49900020 },
            { .reg = 0x00009888, .val = 0x59901111 },
            { .reg = 0x00009888, .val = 0x4B900421 },
            { .reg = 0x00009888, .val = 0x37900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4D900001 },
            { .reg = 0x00009888, .val = 0x45900821 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "012d72cf-82a9-4d25-8ddf-74076fd30797";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x39900340 },
         { .reg = 0x00009888, .val = 0x3F900C00 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x002D5000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D4000 },
         { .reg = 0x00009888, .val = 0x0A2D1000 },
         { .reg = 0x00009888, .val = 0x0C2D5000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x0C2E1400 },
         { .reg = 0x00009888, .val = 0x0E2E5100 },
         { .reg = 0x00009888, .val = 0x102E0114 },
         { .reg = 0x00009888, .val = 0x044CC000 },
         { .reg = 0x00009888, .val = 0x0A4C8000 },
         { .reg = 0x00009888, .val = 0x0C4C8000 },
         { .reg = 0x00009888, .val = 0x0E4C4000 },
         { .reg = 0x00009888, .val = 0x104C8000 },
         { .reg = 0x00009888, .val = 0x124C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x004EA000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E2000 },
         { .reg = 0x00009888, .val = 0x0C4EA000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x004F6B42 },
         { .reg = 0x00009888, .val = 0x064F6200 },
         { .reg = 0x00009888, .val = 0x084F4100 },
         { .reg = 0x00009888, .val = 0x0A4F0061 },
         { .reg = 0x00009888, .val = 0x0C4F6C4C },
         { .reg = 0x00009888, .val = 0x0E4F4B00 },
         { .reg = 0x00009888, .val = 0x1A4F0000 },
         { .reg = 0x00009888, .val = 0x1C4F0000 },
         { .reg = 0x00009888, .val = 0x180F5000 },
         { .reg = 0x00009888, .val = 0x1A0F8800 },
         { .reg = 0x00009888, .val = 0x1C0F08A2 },
         { .reg = 0x00009888, .val = 0x182C4000 },
         { .reg = 0x00009888, .val = 0x1C2C1451 },
         { .reg = 0x00009888, .val = 0x1E2C0001 },
         { .reg = 0x00009888, .val = 0x1A2C0010 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x19938A28 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x19900177 },
         { .reg = 0x00009888, .val = 0x1B900178 },
         { .reg = 0x00009888, .val = 0x1D900125 },
         { .reg = 0x00009888, .val = 0x1F900123 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x53901000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x55900111 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "ce416533-e49e-4211-80af-ec513590a914";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C2E001F },
         { .reg = 0x00009888, .val = 0x0A2F0000 },
         { .reg = 0x00009888, .val = 0x10186800 },
         { .reg = 0x00009888, .val = 0x11810019 },
         { .reg = 0x00009888, .val = 0x15810013 },
         { .reg = 0x00009888, .val = 0x13820020 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x17840000 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x21860000 },
         { .reg = 0x00009888, .val = 0x178703E0 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x022E5400 },
         { .reg = 0x00009888, .val = 0x002E0000 },
         { .reg = 0x00009888, .val = 0x0E2E0080 },
         { .reg = 0x00009888, .val = 0x082F0040 },
         { .reg = 0x00009888, .val = 0x002F0000 },
         { .reg = 0x00009888, .val = 0x06143000 },
         { .reg = 0x00009888, .val = 0x06174000 },
         { .reg = 0x00009888, .val = 0x06180012 },
         { .reg = 0x00009888, .val = 0x00180000 },
         { .reg = 0x00009888, .val = 0x0D804000 },
         { .reg = 0x00009888, .val = 0x0F804000 },
         { .reg = 0x00009888, .val = 0x05804000 },
         { .reg = 0x00009888, .val = 0x09810200 },
         { .reg = 0x00009888, .val = 0x0B810030 },
         { .reg = 0x00009888, .val = 0x03810003 },
         { .reg = 0x00009888, .val = 0x21819140 },
         { .reg = 0x00009888, .val = 0x23819050 },
         { .reg = 0x00009888, .val = 0x25810018 },
         { .reg = 0x00009888, .val = 0x0B820980 },
         { .reg = 0x00009888, .val = 0x03820D80 },
         { .reg = 0x00009888, .val = 0x11820000 },
         { .reg = 0x00009888, .val = 0x0182C000 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x09824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0D830004 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x0F831000 },
         { .reg = 0x00009888, .val = 0x01848072 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x07848000 },
         { .reg = 0x00009888, .val = 0x09844000 },
         { .reg = 0x00009888, .val = 0x0F848000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x09860092 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x01869100 },
         { .reg = 0x00009888, .val = 0x0F870065 },
         { .reg = 0x00009888, .val = 0x01870000 },
         { .reg = 0x00009888, .val = 0x19930800 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x1B952000 },
         { .reg = 0x00009888, .val = 0x1D955055 },
         { .reg = 0x00009888, .val = 0x1F951455 },
         { .reg = 0x00009888, .val = 0x0992A000 },
         { .reg = 0x00009888, .val = 0x0F928000 },
         { .reg = 0x00009888, .val = 0x1192A800 },
         { .reg = 0x00009888, .val = 0x1392028A },
         { .reg = 0x00009888, .val = 0x0B92A000 },
         { .reg = 0x00009888, .val = 0x0D922000 },
         { .reg = 0x00009888, .val = 0x13908000 },
         { .reg = 0x00009888, .val = 0x21908000 },
         { .reg = 0x00009888, .val = 0x23908000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27908000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C01 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900863 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900061 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x45900C22 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metric set";
   query->symbol_name = "MemoryReads";
   query->guid = "398e2452-18d7-42d0-b241-e4d0a9148ada";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19800343 },
         { .reg = 0x00009888, .val = 0x39900340 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x41900003 },
         { .reg = 0x00009888, .val = 0x03803180 },
         { .reg = 0x00009888, .val = 0x058035E2 },
         { .reg = 0x00009888, .val = 0x0780006A },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x2181A000 },
         { .reg = 0x00009888, .val = 0x2381000A },
         { .reg = 0x00009888, .val = 0x1D950550 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D92A000 },
         { .reg = 0x00009888, .val = 0x0F922000 },
         { .reg = 0x00009888, .val = 0x13900170 },
         { .reg = 0x00009888, .val = 0x21900171 },
         { .reg = 0x00009888, .val = 0x23900172 },
         { .reg = 0x00009888, .val = 0x25900173 },
         { .reg = 0x00009888, .val = 0x27900174 },
         { .reg = 0x00009888, .val = 0x29900175 },
         { .reg = 0x00009888, .val = 0x2B900176 },
         { .reg = 0x00009888, .val = 0x2D900177 },
         { .reg = 0x00009888, .val = 0x2F90017F },
         { .reg = 0x00009888, .val = 0x31900125 },
         { .reg = 0x00009888, .val = 0x15900123 },
         { .reg = 0x00009888, .val = 0x17900121 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43901084 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47901080 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49901084 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B901084 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900004 },
         { .reg = 0x00009888, .val = 0x45900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metric set";
   query->symbol_name = "MemoryWrites";
   query->guid = "d324a0d6-7269-4847-a5c2-6f71ddc7fed5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19800343 },
         { .reg = 0x00009888, .val = 0x39900340 },
         { .reg = 0x00009888, .val = 0x3F900000 },
         { .reg = 0x00009888, .val = 0x41900080 },
         { .reg = 0x00009888, .val = 0x03803180 },
         { .reg = 0x00009888, .val = 0x058035E2 },
         { .reg = 0x00009888, .val = 0x0780006A },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x2181A000 },
         { .reg = 0x00009888, .val = 0x2381000A },
         { .reg = 0x00009888, .val = 0x1D950550 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D92A000 },
         { .reg = 0x00009888, .val = 0x0F922000 },
         { .reg = 0x00009888, .val = 0x13900180 },
         { .reg = 0x00009888, .val = 0x21900181 },
         { .reg = 0x00009888, .val = 0x23900182 },
         { .reg = 0x00009888, .val = 0x25900183 },
         { .reg = 0x00009888, .val = 0x27900184 },
         { .reg = 0x00009888, .val = 0x29900185 },
         { .reg = 0x00009888, .val = 0x2B900186 },
         { .reg = 0x00009888, .val = 0x2D900187 },
         { .reg = 0x00009888, .val = 0x2F900170 },
         { .reg = 0x00009888, .val = 0x31900125 },
         { .reg = 0x00009888, .val = 0x15900123 },
         { .reg = 0x00009888, .val = 0x17900121 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43901084 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47901080 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49901084 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B901084 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900004 },
         { .reg = 0x00009888, .val = 0x45900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended metric set";
   query->symbol_name = "ComputeExtended";
   query->guid = "caf3596a-7bb1-4dec-b3b3-2a080d283b49";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x141C0160 },
         { .reg = 0x00009888, .val = 0x161C0015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x002D5000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D5000 },
         { .reg = 0x00009888, .val = 0x0A2D5000 },
         { .reg = 0x00009888, .val = 0x0C2D5000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x0C2E5400 },
         { .reg = 0x00009888, .val = 0x0E2E5515 },
         { .reg = 0x00009888, .val = 0x102E0155 },
         { .reg = 0x00009888, .val = 0x044CC000 },
         { .reg = 0x00009888, .val = 0x0A4C8000 },
         { .reg = 0x00009888, .val = 0x0C4CC000 },
         { .reg = 0x00009888, .val = 0x0E4CC000 },
         { .reg = 0x00009888, .val = 0x104C8000 },
         { .reg = 0x00009888, .val = 0x124C8000 },
         { .reg = 0x00009888, .val = 0x144C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x064CC000 },
         { .reg = 0x00009888, .val = 0x084CC000 },
         { .reg = 0x00009888, .val = 0x004EA000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084EA000 },
         { .reg = 0x00009888, .val = 0x0A4EA000 },
         { .reg = 0x00009888, .val = 0x0C4EA000 },
         { .reg = 0x00009888, .val = 0x0E4EA000 },
         { .reg = 0x00009888, .val = 0x024EA000 },
         { .reg = 0x00009888, .val = 0x044EA000 },
         { .reg = 0x00009888, .val = 0x0E4F4B41 },
         { .reg = 0x00009888, .val = 0x004F4200 },
         { .reg = 0x00009888, .val = 0x024F404C },
         { .reg = 0x00009888, .val = 0x1C4F0000 },
         { .reg = 0x00009888, .val = 0x1A4F0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0A1BC000 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x001C0031 },
         { .reg = 0x00009888, .val = 0x061C1900 },
         { .reg = 0x00009888, .val = 0x081C1A33 },
         { .reg = 0x00009888, .val = 0x0A1C1B35 },
         { .reg = 0x00009888, .val = 0x0C1C3337 },
         { .reg = 0x00009888, .val = 0x041C31C7 },
         { .reg = 0x00009888, .val = 0x180F5000 },
         { .reg = 0x00009888, .val = 0x1A0FA8AA },
         { .reg = 0x00009888, .val = 0x1C0F0AAA },
         { .reg = 0x00009888, .val = 0x182C8000 },
         { .reg = 0x00009888, .val = 0x1C2C6AAA },
         { .reg = 0x00009888, .val = 0x1E2C0001 },
         { .reg = 0x00009888, .val = 0x1A2C2950 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x1993AAAA },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29904000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900420 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900400 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900001 },
         { .reg = 0x00009888, .val = 0x45900001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache metric set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "49b956e2-d5b9-47e0-9d8a-cee5e8cec527";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C03B0 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900C00 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x002D1000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D5000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x0C2E0400 },
         { .reg = 0x00009888, .val = 0x0E2E1500 },
         { .reg = 0x00009888, .val = 0x102E0140 },
         { .reg = 0x00009888, .val = 0x044C4000 },
         { .reg = 0x00009888, .val = 0x0A4C8000 },
         { .reg = 0x00009888, .val = 0x0C4CC000 },
         { .reg = 0x00009888, .val = 0x144C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x004E2000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084EA000 },
         { .reg = 0x00009888, .val = 0x0E4EA000 },
         { .reg = 0x00009888, .val = 0x1A4F4001 },
         { .reg = 0x00009888, .val = 0x1C4F5005 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x180F1000 },
         { .reg = 0x00009888, .val = 0x1A0FA800 },
         { .reg = 0x00009888, .val = 0x1C0F0A00 },
         { .reg = 0x00009888, .val = 0x182C4000 },
         { .reg = 0x00009888, .val = 0x1C2C4015 },
         { .reg = 0x00009888, .val = 0x1E2C0001 },
         { .reg = 0x00009888, .val = 0x03931980 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x1993A00A },
         { .reg = 0x00009888, .val = 0x07930000 },
         { .reg = 0x00009888, .val = 0x09930000 },
         { .reg = 0x00009888, .val = 0x1D900177 },
         { .reg = 0x00009888, .val = 0x1F900178 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x53901000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x55900111 },
         { .reg = 0x00009888, .val = 0x47900001 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = bxt__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "ad9d9bc2-ccb7-4f5d-adbb-b4e67e615efd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x022D4000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0E2E0055 },
         { .reg = 0x00009888, .val = 0x064C8000 },
         { .reg = 0x00009888, .val = 0x084CC000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044EA000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x024F6100 },
         { .reg = 0x00009888, .val = 0x044F416B },
         { .reg = 0x00009888, .val = 0x064F004B },
         { .reg = 0x00009888, .val = 0x1A4F0000 },
         { .reg = 0x00009888, .val = 0x1A0F02A8 },
         { .reg = 0x00009888, .val = 0x1A2C5500 },
         { .reg = 0x00009888, .val = 0x0F808000 },
         { .reg = 0x00009888, .val = 0x25810020 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x1F951000 },
         { .reg = 0x00009888, .val = 0x13920200 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4D900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "bde7b434-349c-4aad-8596-eb23c0352168";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      if (perf->sys_vars.revision >= 0x03) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x000091BC, .val = 0xE0500000 },
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009888, .val = 0x12643400 },
            { .reg = 0x00009888, .val = 0x12653400 },
            { .reg = 0x00009888, .val = 0x106C6800 },
            { .reg = 0x00009888, .val = 0x126C001E },
            { .reg = 0x00009888, .val = 0x166C0010 },
            { .reg = 0x00009888, .val = 0x0C2D5000 },
            { .reg = 0x00009888, .val = 0x0E2D5000 },
            { .reg = 0x00009888, .val = 0x002D4000 },
            { .reg = 0x00009888, .val = 0x022D5000 },
            { .reg = 0x00009888, .val = 0x042D5000 },
            { .reg = 0x00009888, .val = 0x062D1000 },
            { .reg = 0x00009888, .val = 0x102E0154 },
            { .reg = 0x00009888, .val = 0x0C2E5000 },
            { .reg = 0x00009888, .val = 0x0E2E0055 },
            { .reg = 0x00009888, .val = 0x104C8000 },
            { .reg = 0x00009888, .val = 0x124C8000 },
            { .reg = 0x00009888, .val = 0x144C8000 },
            { .reg = 0x00009888, .val = 0x164C2000 },
            { .reg = 0x00009888, .val = 0x044C8000 },
            { .reg = 0x00009888, .val = 0x064CC000 },
            { .reg = 0x00009888, .val = 0x084CC000 },
            { .reg = 0x00009888, .val = 0x0A4C4000 },
            { .reg = 0x00009888, .val = 0x0C4EA000 },
            { .reg = 0x00009888, .val = 0x0E4EA000 },
            { .reg = 0x00009888, .val = 0x004E8000 },
            { .reg = 0x00009888, .val = 0x024EA000 },
            { .reg = 0x00009888, .val = 0x044EA000 },
            { .reg = 0x00009888, .val = 0x064E2000 },
            { .reg = 0x00009888, .val = 0x1C4F5500 },
            { .reg = 0x00009888, .val = 0x1A4F1554 },
            { .reg = 0x00009888, .val = 0x0A640024 },
            { .reg = 0x00009888, .val = 0x10640000 },
            { .reg = 0x00009888, .val = 0x04640000 },
            { .reg = 0x00009888, .val = 0x0C650024 },
            { .reg = 0x00009888, .val = 0x10650000 },
            { .reg = 0x00009888, .val = 0x06650000 },
            { .reg = 0x00009888, .val = 0x0C6C5327 },
            { .reg = 0x00009888, .val = 0x0E6C5425 },
            { .reg = 0x00009888, .val = 0x006C2A00 },
            { .reg = 0x00009888, .val = 0x026C285B },
            { .reg = 0x00009888, .val = 0x046C005C },
            { .reg = 0x00009888, .val = 0x1C6C0000 },
            { .reg = 0x00009888, .val = 0x1A6C0900 },
            { .reg = 0x00009888, .val = 0x1C0F0AA0 },
            { .reg = 0x00009888, .val = 0x180F4000 },
            { .reg = 0x00009888, .val = 0x1A0F02AA },
            { .reg = 0x00009888, .val = 0x1C2C5400 },
            { .reg = 0x00009888, .val = 0x1E2C0001 },
            { .reg = 0x00009888, .val = 0x1A2C5550 },
            { .reg = 0x00009888, .val = 0x1993AA00 },
            { .reg = 0x00009888, .val = 0x03938000 },
            { .reg = 0x00009888, .val = 0x05938000 },
            { .reg = 0x00009888, .val = 0x07938000 },
            { .reg = 0x00009888, .val = 0x09938000 },
            { .reg = 0x00009888, .val = 0x0B938000 },
            { .reg = 0x00009888, .val = 0x0D938000 },
            { .reg = 0x00009888, .val = 0x2B904000 },
            { .reg = 0x00009888, .val = 0x2D904000 },
            { .reg = 0x00009888, .val = 0x2F904000 },
            { .reg = 0x00009888, .val = 0x31904000 },
            { .reg = 0x00009888, .val = 0x15904000 },
            { .reg = 0x00009888, .val = 0x17904000 },
            { .reg = 0x00009888, .val = 0x19904000 },
            { .reg = 0x00009888, .val = 0x1B904000 },
            { .reg = 0x00009888, .val = 0x1D904000 },
            { .reg = 0x00009888, .val = 0x1F904000 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x4B900421 },
            { .reg = 0x00009888, .val = 0x37900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4D900001 },
            { .reg = 0x00009888, .val = 0x53900000 },
            { .reg = 0x00009888, .val = 0x43900420 },
            { .reg = 0x00009888, .val = 0x45900021 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x47900000 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      if (perf->sys_vars.revision < 0x03) {
         static const struct intel_perf_query_register_prog mux_regs[] = {
            { .reg = 0x000091BC, .val = 0xE0500000 },
            { .reg = 0x00009840, .val = 0x00000080 },
            { .reg = 0x00009888, .val = 0x14640340 },
            { .reg = 0x00009888, .val = 0x14650340 },
            { .reg = 0x00009888, .val = 0x106C6800 },
            { .reg = 0x00009888, .val = 0x126C001E },
            { .reg = 0x00009888, .val = 0x166C0010 },
            { .reg = 0x00009888, .val = 0x0C2D5000 },
            { .reg = 0x00009888, .val = 0x0E2D5000 },
            { .reg = 0x00009888, .val = 0x002D4000 },
            { .reg = 0x00009888, .val = 0x022D5000 },
            { .reg = 0x00009888, .val = 0x042D5000 },
            { .reg = 0x00009888, .val = 0x062D1000 },
            { .reg = 0x00009888, .val = 0x102E0154 },
            { .reg = 0x00009888, .val = 0x0C2E5000 },
            { .reg = 0x00009888, .val = 0x0E2E0055 },
            { .reg = 0x00009888, .val = 0x104C8000 },
            { .reg = 0x00009888, .val = 0x124C8000 },
            { .reg = 0x00009888, .val = 0x144C8000 },
            { .reg = 0x00009888, .val = 0x164C2000 },
            { .reg = 0x00009888, .val = 0x044C8000 },
            { .reg = 0x00009888, .val = 0x064CC000 },
            { .reg = 0x00009888, .val = 0x084CC000 },
            { .reg = 0x00009888, .val = 0x0A4C4000 },
            { .reg = 0x00009888, .val = 0x0C4EA000 },
            { .reg = 0x00009888, .val = 0x0E4EA000 },
            { .reg = 0x00009888, .val = 0x004E8000 },
            { .reg = 0x00009888, .val = 0x024EA000 },
            { .reg = 0x00009888, .val = 0x044EA000 },
            { .reg = 0x00009888, .val = 0x064E2000 },
            { .reg = 0x00009888, .val = 0x1C4F5500 },
            { .reg = 0x00009888, .val = 0x1A4F1554 },
            { .reg = 0x00009888, .val = 0x04642400 },
            { .reg = 0x00009888, .val = 0x22640000 },
            { .reg = 0x00009888, .val = 0x1A640000 },
            { .reg = 0x00009888, .val = 0x06650024 },
            { .reg = 0x00009888, .val = 0x22650000 },
            { .reg = 0x00009888, .val = 0x1C650000 },
            { .reg = 0x00009888, .val = 0x0C6C5327 },
            { .reg = 0x00009888, .val = 0x0E6C5425 },
            { .reg = 0x00009888, .val = 0x006C2A00 },
            { .reg = 0x00009888, .val = 0x026C285B },
            { .reg = 0x00009888, .val = 0x046C005C },
            { .reg = 0x00009888, .val = 0x1C6C0000 },
            { .reg = 0x00009888, .val = 0x1A6C0900 },
            { .reg = 0x00009888, .val = 0x1C0F0AA0 },
            { .reg = 0x00009888, .val = 0x180F4000 },
            { .reg = 0x00009888, .val = 0x1A0F02AA },
            { .reg = 0x00009888, .val = 0x1C2C5400 },
            { .reg = 0x00009888, .val = 0x1E2C0001 },
            { .reg = 0x00009888, .val = 0x1A2C5550 },
            { .reg = 0x00009888, .val = 0x1993AA00 },
            { .reg = 0x00009888, .val = 0x03938000 },
            { .reg = 0x00009888, .val = 0x05938000 },
            { .reg = 0x00009888, .val = 0x07938000 },
            { .reg = 0x00009888, .val = 0x09938000 },
            { .reg = 0x00009888, .val = 0x0B938000 },
            { .reg = 0x00009888, .val = 0x0D938000 },
            { .reg = 0x00009888, .val = 0x2B904000 },
            { .reg = 0x00009888, .val = 0x2D904000 },
            { .reg = 0x00009888, .val = 0x2F904000 },
            { .reg = 0x00009888, .val = 0x31904000 },
            { .reg = 0x00009888, .val = 0x15904000 },
            { .reg = 0x00009888, .val = 0x17904000 },
            { .reg = 0x00009888, .val = 0x19904000 },
            { .reg = 0x00009888, .val = 0x1B904000 },
            { .reg = 0x00009888, .val = 0x1D904000 },
            { .reg = 0x00009888, .val = 0x1F904000 },
            { .reg = 0x00009888, .val = 0x59900000 },
            { .reg = 0x00009888, .val = 0x4B900421 },
            { .reg = 0x00009888, .val = 0x37900000 },
            { .reg = 0x00009888, .val = 0x33900000 },
            { .reg = 0x00009888, .val = 0x4D900001 },
            { .reg = 0x00009888, .val = 0x53900000 },
            { .reg = 0x00009888, .val = 0x43900420 },
            { .reg = 0x00009888, .val = 0x45900021 },
            { .reg = 0x00009888, .val = 0x55900000 },
            { .reg = 0x00009888, .val = 0x47900000 },
         };
         query->config.mux_regs = mux_regs;
         query->config.n_mux_regs = ARRAY_SIZE(mux_regs);
      }

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "f3d845e6-bfc3-4ffe-b0e5-dddc80c21f07";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102D7800 },
         { .reg = 0x00009888, .val = 0x122D79E0 },
         { .reg = 0x00009888, .val = 0x0C2F0004 },
         { .reg = 0x00009888, .val = 0x100E3800 },
         { .reg = 0x00009888, .val = 0x180F0005 },
         { .reg = 0x00009888, .val = 0x002D0940 },
         { .reg = 0x00009888, .val = 0x022D802F },
         { .reg = 0x00009888, .val = 0x042D4013 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0E2E0050 },
         { .reg = 0x00009888, .val = 0x022F0010 },
         { .reg = 0x00009888, .val = 0x002F0000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x040E0480 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x060F0027 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x1A0F0040 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x439014A0 },
         { .reg = 0x00009888, .val = 0x459000A4 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "a53d620b-c14e-49df-bd27-e076b3fb98f3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121300A0 },
         { .reg = 0x00009888, .val = 0x141600AB },
         { .reg = 0x00009888, .val = 0x123300A0 },
         { .reg = 0x00009888, .val = 0x143600AB },
         { .reg = 0x00009888, .val = 0x125300A0 },
         { .reg = 0x00009888, .val = 0x145600AB },
         { .reg = 0x00009888, .val = 0x0C2D4000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x002D4000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x102E01A0 },
         { .reg = 0x00009888, .val = 0x0C2E5000 },
         { .reg = 0x00009888, .val = 0x0E2E0065 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x044C8000 },
         { .reg = 0x00009888, .val = 0x064CC000 },
         { .reg = 0x00009888, .val = 0x084C4000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x024EA000 },
         { .reg = 0x00009888, .val = 0x044E2000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x1C0F0800 },
         { .reg = 0x00009888, .val = 0x180F4000 },
         { .reg = 0x00009888, .val = 0x1A0F023F },
         { .reg = 0x00009888, .val = 0x1E2C0003 },
         { .reg = 0x00009888, .val = 0x1A2CC030 },
         { .reg = 0x00009888, .val = 0x04132180 },
         { .reg = 0x00009888, .val = 0x02130000 },
         { .reg = 0x00009888, .val = 0x0C148000 },
         { .reg = 0x00009888, .val = 0x0E142000 },
         { .reg = 0x00009888, .val = 0x04148000 },
         { .reg = 0x00009888, .val = 0x1E150140 },
         { .reg = 0x00009888, .val = 0x1C150040 },
         { .reg = 0x00009888, .val = 0x0C163000 },
         { .reg = 0x00009888, .val = 0x0E160068 },
         { .reg = 0x00009888, .val = 0x10160000 },
         { .reg = 0x00009888, .val = 0x18160000 },
         { .reg = 0x00009888, .val = 0x0A164000 },
         { .reg = 0x00009888, .val = 0x04330043 },
         { .reg = 0x00009888, .val = 0x02330000 },
         { .reg = 0x00009888, .val = 0x0234A000 },
         { .reg = 0x00009888, .val = 0x04342000 },
         { .reg = 0x00009888, .val = 0x1C350015 },
         { .reg = 0x00009888, .val = 0x02363460 },
         { .reg = 0x00009888, .val = 0x10360000 },
         { .reg = 0x00009888, .val = 0x04360000 },
         { .reg = 0x00009888, .val = 0x06360000 },
         { .reg = 0x00009888, .val = 0x08364000 },
         { .reg = 0x00009888, .val = 0x06530043 },
         { .reg = 0x00009888, .val = 0x02530000 },
         { .reg = 0x00009888, .val = 0x0E548000 },
         { .reg = 0x00009888, .val = 0x00548000 },
         { .reg = 0x00009888, .val = 0x06542000 },
         { .reg = 0x00009888, .val = 0x1E550400 },
         { .reg = 0x00009888, .val = 0x1A552000 },
         { .reg = 0x00009888, .val = 0x1C550100 },
         { .reg = 0x00009888, .val = 0x0E563000 },
         { .reg = 0x00009888, .val = 0x00563400 },
         { .reg = 0x00009888, .val = 0x10560000 },
         { .reg = 0x00009888, .val = 0x18560000 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x0C564000 },
         { .reg = 0x00009888, .val = 0x1993A800 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B9014A0 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900001 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900820 },
         { .reg = 0x00009888, .val = 0x45901022 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "8a44b172-6e1d-445f-8937-c06cc487ba39";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A0000 },
         { .reg = 0x00009888, .val = 0x143A0000 },
         { .reg = 0x00009888, .val = 0x145A0000 },
         { .reg = 0x00009888, .val = 0x0C2D4000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x002D4000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x102E0150 },
         { .reg = 0x00009888, .val = 0x0C2E5000 },
         { .reg = 0x00009888, .val = 0x0E2E006A },
         { .reg = 0x00009888, .val = 0x124C8000 },
         { .reg = 0x00009888, .val = 0x144C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x044C8000 },
         { .reg = 0x00009888, .val = 0x064C4000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x0E4EA000 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x024E2000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x1C0F0BC0 },
         { .reg = 0x00009888, .val = 0x180F4000 },
         { .reg = 0x00009888, .val = 0x1A0F0302 },
         { .reg = 0x00009888, .val = 0x1E2C0003 },
         { .reg = 0x00009888, .val = 0x1A2C00F0 },
         { .reg = 0x00009888, .val = 0x021A3080 },
         { .reg = 0x00009888, .val = 0x041A31E5 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x0414A000 },
         { .reg = 0x00009888, .val = 0x1C150054 },
         { .reg = 0x00009888, .val = 0x06168000 },
         { .reg = 0x00009888, .val = 0x08168000 },
         { .reg = 0x00009888, .val = 0x0A168000 },
         { .reg = 0x00009888, .val = 0x0C3A3280 },
         { .reg = 0x00009888, .val = 0x0E3A0063 },
         { .reg = 0x00009888, .val = 0x063A0061 },
         { .reg = 0x00009888, .val = 0x023A0000 },
         { .reg = 0x00009888, .val = 0x0C348000 },
         { .reg = 0x00009888, .val = 0x0E342000 },
         { .reg = 0x00009888, .val = 0x06342000 },
         { .reg = 0x00009888, .val = 0x1E350140 },
         { .reg = 0x00009888, .val = 0x1C350100 },
         { .reg = 0x00009888, .val = 0x18360028 },
         { .reg = 0x00009888, .val = 0x0C368000 },
         { .reg = 0x00009888, .val = 0x0E5A3080 },
         { .reg = 0x00009888, .val = 0x005A3280 },
         { .reg = 0x00009888, .val = 0x025A0063 },
         { .reg = 0x00009888, .val = 0x0E548000 },
         { .reg = 0x00009888, .val = 0x00548000 },
         { .reg = 0x00009888, .val = 0x02542000 },
         { .reg = 0x00009888, .val = 0x1E550400 },
         { .reg = 0x00009888, .val = 0x1A552000 },
         { .reg = 0x00009888, .val = 0x1C550001 },
         { .reg = 0x00009888, .val = 0x18560080 },
         { .reg = 0x00009888, .val = 0x02568000 },
         { .reg = 0x00009888, .val = 0x04568000 },
         { .reg = 0x00009888, .val = 0x1993A800 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x45901084 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "9d4643f8-aa05-482e-8193-070a8ab0d117";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A026B },
         { .reg = 0x00009888, .val = 0x143A0173 },
         { .reg = 0x00009888, .val = 0x145A026B },
         { .reg = 0x00009888, .val = 0x002D4000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0C2E5000 },
         { .reg = 0x00009888, .val = 0x0E2E0069 },
         { .reg = 0x00009888, .val = 0x044C8000 },
         { .reg = 0x00009888, .val = 0x064CC000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x024EA000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x180F6000 },
         { .reg = 0x00009888, .val = 0x1A0F030A },
         { .reg = 0x00009888, .val = 0x1A2C03C0 },
         { .reg = 0x00009888, .val = 0x041A37E7 },
         { .reg = 0x00009888, .val = 0x021A0000 },
         { .reg = 0x00009888, .val = 0x0414A000 },
         { .reg = 0x00009888, .val = 0x1C150050 },
         { .reg = 0x00009888, .val = 0x08168000 },
         { .reg = 0x00009888, .val = 0x0A168000 },
         { .reg = 0x00009888, .val = 0x003A3380 },
         { .reg = 0x00009888, .val = 0x063A006F },
         { .reg = 0x00009888, .val = 0x023A0000 },
         { .reg = 0x00009888, .val = 0x00348000 },
         { .reg = 0x00009888, .val = 0x06342000 },
         { .reg = 0x00009888, .val = 0x1A352000 },
         { .reg = 0x00009888, .val = 0x1C350100 },
         { .reg = 0x00009888, .val = 0x02368000 },
         { .reg = 0x00009888, .val = 0x0C368000 },
         { .reg = 0x00009888, .val = 0x025A37E7 },
         { .reg = 0x00009888, .val = 0x0254A000 },
         { .reg = 0x00009888, .val = 0x1C550005 },
         { .reg = 0x00009888, .val = 0x04568000 },
         { .reg = 0x00009888, .val = 0x06568000 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900020 },
         { .reg = 0x00009888, .val = 0x45901080 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "fa6ecf21-2cb8-4d0b-9308-6e4a7b4ca87a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A001F },
         { .reg = 0x00009888, .val = 0x143A001F },
         { .reg = 0x00009888, .val = 0x145A001F },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0E2E0094 },
         { .reg = 0x00009888, .val = 0x084CC000 },
         { .reg = 0x00009888, .val = 0x044EA000 },
         { .reg = 0x00009888, .val = 0x1A0F00E0 },
         { .reg = 0x00009888, .val = 0x1A2C0C00 },
         { .reg = 0x00009888, .val = 0x061A0063 },
         { .reg = 0x00009888, .val = 0x021A0000 },
         { .reg = 0x00009888, .val = 0x06142000 },
         { .reg = 0x00009888, .val = 0x1C150100 },
         { .reg = 0x00009888, .val = 0x0C168000 },
         { .reg = 0x00009888, .val = 0x043A3180 },
         { .reg = 0x00009888, .val = 0x023A0000 },
         { .reg = 0x00009888, .val = 0x04348000 },
         { .reg = 0x00009888, .val = 0x1C350040 },
         { .reg = 0x00009888, .val = 0x0A368000 },
         { .reg = 0x00009888, .val = 0x045A0063 },
         { .reg = 0x00009888, .val = 0x025A0000 },
         { .reg = 0x00009888, .val = 0x04542000 },
         { .reg = 0x00009888, .val = 0x1C550010 },
         { .reg = 0x00009888, .val = 0x08568000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x47900004 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00001000 },
         { .reg = 0x0000E558, .val = 0x00003002 },
         { .reg = 0x0000E658, .val = 0x00005004 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00050012 },
         { .reg = 0x0000E55C, .val = 0x00052051 },
         { .reg = 0x0000E65C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness metrics set";
   query->symbol_name = "GpuBusyness";
   query->guid = "c9f5fa3a-d14f-400c-a89a-211206b00ee7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 8);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x13805800 },
         { .reg = 0x00009888, .val = 0x05962C00 },
         { .reg = 0x00009888, .val = 0x19950016 },
         { .reg = 0x00009888, .val = 0x21C05800 },
         { .reg = 0x00009888, .val = 0x07800035 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x23810008 },
         { .reg = 0x00009888, .val = 0x07960025 },
         { .reg = 0x00009888, .val = 0x1D960000 },
         { .reg = 0x00009888, .val = 0x0F960000 },
         { .reg = 0x00009888, .val = 0x0B934000 },
         { .reg = 0x00009888, .val = 0x09948000 },
         { .reg = 0x00009888, .val = 0x05950085 },
         { .reg = 0x00009888, .val = 0x11950000 },
         { .reg = 0x00009888, .val = 0x1D950400 },
         { .reg = 0x00009888, .val = 0x0B924000 },
         { .reg = 0x00009888, .val = 0x0D922000 },
         { .reg = 0x00009888, .val = 0x0F922000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x0BC000A5 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900442 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00078000 },
         { .reg = 0x00002774, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "5ee72f5c-092f-421e-8b70-225f7c3e9612";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19800000 },
         { .reg = 0x00009888, .val = 0x07800063 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x23810008 },
         { .reg = 0x00009888, .val = 0x1D950400 },
         { .reg = 0x00009888, .val = 0x0F922000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "d49cd0d8-8c7f-4465-94fc-51e08c9050bc";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x124C3080 },
         { .reg = 0x00009888, .val = 0x002D1000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D5000 },
         { .reg = 0x00009888, .val = 0x0A2D5000 },
         { .reg = 0x00009888, .val = 0x0C2E0400 },
         { .reg = 0x00009888, .val = 0x0E2E5500 },
         { .reg = 0x00009888, .val = 0x102E0001 },
         { .reg = 0x00009888, .val = 0x004C0045 },
         { .reg = 0x00009888, .val = 0x064C2300 },
         { .reg = 0x00009888, .val = 0x084C26C4 },
         { .reg = 0x00009888, .val = 0x0A4C264E },
         { .reg = 0x00009888, .val = 0x164C0000 },
         { .reg = 0x00009888, .val = 0x044C0000 },
         { .reg = 0x00009888, .val = 0x0C4C0000 },
         { .reg = 0x00009888, .val = 0x0E4C0000 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x199300AA },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29904000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = bxt__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
bxt_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute metrics set";
   query->symbol_name = "AsyncCompute";
   query->guid = "b890cb09-42f7-4644-8aac-a7beca4b9181";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = bxt__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = bxt__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = bxt__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_bxt(struct intel_perf_config *perf)
{
   bxt_register_render_basic_counter_query(perf);
   bxt_register_compute_basic_counter_query(perf);
   bxt_register_render_pipe_profile_counter_query(perf);
   bxt_register_memory_reads_counter_query(perf);
   bxt_register_memory_writes_counter_query(perf);
   bxt_register_compute_extended_counter_query(perf);
   bxt_register_compute_l3_cache_counter_query(perf);
   bxt_register_hdc_and_sf_counter_query(perf);
   bxt_register_l3_1_counter_query(perf);
   bxt_register_rasterizer_and_pixel_backend_counter_query(perf);
   bxt_register_sampler_counter_query(perf);
   bxt_register_tdl_1_counter_query(perf);
   bxt_register_tdl_2_counter_query(perf);
   bxt_register_compute_extra_counter_query(perf);
   bxt_register_gpu_busyness_counter_query(perf);
   bxt_register_test_oa_counter_query(perf);
   bxt_register_pma__stall_counter_query(perf);
   bxt_register_async_compute_counter_query(perf);
}


static void
glk_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "d72df5c7-5b4a-4274-a43f-00b0fd51fc68";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 52);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C00F0 },
         { .reg = 0x00009888, .val = 0x12120280 },
         { .reg = 0x00009888, .val = 0x12320280 },
         { .reg = 0x00009888, .val = 0x11930317 },
         { .reg = 0x00009888, .val = 0x159303DF },
         { .reg = 0x00009888, .val = 0x3F900C00 },
         { .reg = 0x00009888, .val = 0x419000A0 },
         { .reg = 0x00009888, .val = 0x002D1000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D5000 },
         { .reg = 0x00009888, .val = 0x0A2D1000 },
         { .reg = 0x00009888, .val = 0x0C2E0800 },
         { .reg = 0x00009888, .val = 0x0E2E5900 },
         { .reg = 0x00009888, .val = 0x0A4C8000 },
         { .reg = 0x00009888, .val = 0x0C4C8000 },
         { .reg = 0x00009888, .val = 0x0E4C4000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E2000 },
         { .reg = 0x00009888, .val = 0x1C4F0010 },
         { .reg = 0x00009888, .val = 0x0A6C0053 },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1A0FCC00 },
         { .reg = 0x00009888, .val = 0x1C0F0002 },
         { .reg = 0x00009888, .val = 0x1C2C0040 },
         { .reg = 0x00009888, .val = 0x00101000 },
         { .reg = 0x00009888, .val = 0x04101000 },
         { .reg = 0x00009888, .val = 0x00114000 },
         { .reg = 0x00009888, .val = 0x08114000 },
         { .reg = 0x00009888, .val = 0x00120020 },
         { .reg = 0x00009888, .val = 0x08120021 },
         { .reg = 0x00009888, .val = 0x00141000 },
         { .reg = 0x00009888, .val = 0x08141000 },
         { .reg = 0x00009888, .val = 0x02308000 },
         { .reg = 0x00009888, .val = 0x04302000 },
         { .reg = 0x00009888, .val = 0x06318000 },
         { .reg = 0x00009888, .val = 0x08318000 },
         { .reg = 0x00009888, .val = 0x06320800 },
         { .reg = 0x00009888, .val = 0x08320840 },
         { .reg = 0x00009888, .val = 0x00320000 },
         { .reg = 0x00009888, .val = 0x06344000 },
         { .reg = 0x00009888, .val = 0x08344000 },
         { .reg = 0x00009888, .val = 0x0D931831 },
         { .reg = 0x00009888, .val = 0x0F939F3F },
         { .reg = 0x00009888, .val = 0x01939E80 },
         { .reg = 0x00009888, .val = 0x039303BC },
         { .reg = 0x00009888, .val = 0x0593000E },
         { .reg = 0x00009888, .val = 0x1993002A },
         { .reg = 0x00009888, .val = 0x07930000 },
         { .reg = 0x00009888, .val = 0x09930000 },
         { .reg = 0x00009888, .val = 0x1D900177 },
         { .reg = 0x00009888, .val = 0x1F900187 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x53901110 },
         { .reg = 0x00009888, .val = 0x43900423 },
         { .reg = 0x00009888, .val = 0x55900111 },
         { .reg = 0x00009888, .val = 0x47900C02 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900020 },
         { .reg = 0x00009888, .val = 0x59901111 },
         { .reg = 0x00009888, .val = 0x4B900421 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900001 },
         { .reg = 0x00009888, .val = 0x45900821 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__render_basic__sampler0_busy__read;
         counter->name = "Sampler 0 Busy";
         counter->desc = "The percentage of time in which Sampler 0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__render_basic__sampler1_busy__read;
         counter->name = "Sampler 1 Busy";
         counter->desc = "The percentage of time in which Sampler 1 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__samplers_busy__read;
      counter->name = "Samplers Busy";
      counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplersBusy";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      if (perf->sys_vars.subslice_mask & 0x09) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__render_basic__sampler0_bottleneck__read;
         counter->name = "Sampler 0 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler0Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 0x12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__render_basic__sampler1_bottleneck__read;
         counter->name = "Sampler 1 Bottleneck";
         counter->desc = "The percentage of time in which Sampler 1 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler1Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gti_vf_throughput__read;
      counter->name = "GTI Fixed Pipe Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between 3D Pipeline (Command Dispatch, Input Assembly and Stream Output) and GTI. Unit: bytes.";
      counter->symbol_name = "GtiVfThroughput";
      counter->category = "GTI/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gti_depth_throughput__read;
      counter->name = "GTI Depth Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between depth caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiDepthThroughput";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gti_rcc_throughput__read;
      counter->name = "GTI RCC Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between render color caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiRccThroughput";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 312;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gti_hdc_lookups_throughput__read;
      counter->name = "GTI HDC TLB Lookup Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between GTI and HDC, when HDC is doing TLB lookups. Unit: bytes.";
      counter->symbol_name = "GtiHdcLookupsThroughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 320;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_basic__sampler_bottleneck__read;
      counter->name = "Samplers Bottleneck";
      counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
      counter->symbol_name = "SamplerBottleneck";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 344;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "814285f6-354d-41d2-ba49-e24e622714a0";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x124F1C00 },
         { .reg = 0x00009888, .val = 0x39900340 },
         { .reg = 0x00009888, .val = 0x3F900C00 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x002D5000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D4000 },
         { .reg = 0x00009888, .val = 0x0A2D1000 },
         { .reg = 0x00009888, .val = 0x0C2D5000 },
         { .reg = 0x00009888, .val = 0x0E2D4000 },
         { .reg = 0x00009888, .val = 0x0C2E1400 },
         { .reg = 0x00009888, .val = 0x0E2E5100 },
         { .reg = 0x00009888, .val = 0x102E0114 },
         { .reg = 0x00009888, .val = 0x044CC000 },
         { .reg = 0x00009888, .val = 0x0A4C8000 },
         { .reg = 0x00009888, .val = 0x0C4C8000 },
         { .reg = 0x00009888, .val = 0x0E4C4000 },
         { .reg = 0x00009888, .val = 0x104C8000 },
         { .reg = 0x00009888, .val = 0x124C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x004EA000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084E8000 },
         { .reg = 0x00009888, .val = 0x0A4E2000 },
         { .reg = 0x00009888, .val = 0x0C4EA000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x004F6B42 },
         { .reg = 0x00009888, .val = 0x064F6200 },
         { .reg = 0x00009888, .val = 0x084F4100 },
         { .reg = 0x00009888, .val = 0x0A4F0061 },
         { .reg = 0x00009888, .val = 0x0C4F6C4C },
         { .reg = 0x00009888, .val = 0x0E4F4B00 },
         { .reg = 0x00009888, .val = 0x1A4F0000 },
         { .reg = 0x00009888, .val = 0x1C4F0000 },
         { .reg = 0x00009888, .val = 0x180F5000 },
         { .reg = 0x00009888, .val = 0x1A0F8800 },
         { .reg = 0x00009888, .val = 0x1C0F08A2 },
         { .reg = 0x00009888, .val = 0x182C4000 },
         { .reg = 0x00009888, .val = 0x1C2C1451 },
         { .reg = 0x00009888, .val = 0x1E2C0001 },
         { .reg = 0x00009888, .val = 0x1A2C0010 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x19938A28 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x19900177 },
         { .reg = 0x00009888, .val = 0x1B900178 },
         { .reg = 0x00009888, .val = 0x1D900125 },
         { .reg = 0x00009888, .val = 0x1F900123 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x53901000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x55900111 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "07d397a6-b3e6-49f6-9433-a4f293d55978";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x0C2E001F },
         { .reg = 0x00009888, .val = 0x0A2F0000 },
         { .reg = 0x00009888, .val = 0x10186800 },
         { .reg = 0x00009888, .val = 0x11810019 },
         { .reg = 0x00009888, .val = 0x15810013 },
         { .reg = 0x00009888, .val = 0x13820020 },
         { .reg = 0x00009888, .val = 0x11830020 },
         { .reg = 0x00009888, .val = 0x17840000 },
         { .reg = 0x00009888, .val = 0x11860007 },
         { .reg = 0x00009888, .val = 0x21860000 },
         { .reg = 0x00009888, .val = 0x178703E0 },
         { .reg = 0x00009888, .val = 0x0C2D8000 },
         { .reg = 0x00009888, .val = 0x042D4000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x022E5400 },
         { .reg = 0x00009888, .val = 0x002E0000 },
         { .reg = 0x00009888, .val = 0x0E2E0080 },
         { .reg = 0x00009888, .val = 0x082F0040 },
         { .reg = 0x00009888, .val = 0x002F0000 },
         { .reg = 0x00009888, .val = 0x06143000 },
         { .reg = 0x00009888, .val = 0x06174000 },
         { .reg = 0x00009888, .val = 0x06180012 },
         { .reg = 0x00009888, .val = 0x00180000 },
         { .reg = 0x00009888, .val = 0x0D804000 },
         { .reg = 0x00009888, .val = 0x0F804000 },
         { .reg = 0x00009888, .val = 0x05804000 },
         { .reg = 0x00009888, .val = 0x09810200 },
         { .reg = 0x00009888, .val = 0x0B810030 },
         { .reg = 0x00009888, .val = 0x03810003 },
         { .reg = 0x00009888, .val = 0x21819140 },
         { .reg = 0x00009888, .val = 0x23819050 },
         { .reg = 0x00009888, .val = 0x25810018 },
         { .reg = 0x00009888, .val = 0x0B820980 },
         { .reg = 0x00009888, .val = 0x03820D80 },
         { .reg = 0x00009888, .val = 0x11820000 },
         { .reg = 0x00009888, .val = 0x0182C000 },
         { .reg = 0x00009888, .val = 0x07828000 },
         { .reg = 0x00009888, .val = 0x09824000 },
         { .reg = 0x00009888, .val = 0x0F828000 },
         { .reg = 0x00009888, .val = 0x0D830004 },
         { .reg = 0x00009888, .val = 0x0583000C },
         { .reg = 0x00009888, .val = 0x0F831000 },
         { .reg = 0x00009888, .val = 0x01848072 },
         { .reg = 0x00009888, .val = 0x11840000 },
         { .reg = 0x00009888, .val = 0x07848000 },
         { .reg = 0x00009888, .val = 0x09844000 },
         { .reg = 0x00009888, .val = 0x0F848000 },
         { .reg = 0x00009888, .val = 0x07860000 },
         { .reg = 0x00009888, .val = 0x09860092 },
         { .reg = 0x00009888, .val = 0x0F860400 },
         { .reg = 0x00009888, .val = 0x01869100 },
         { .reg = 0x00009888, .val = 0x0F870065 },
         { .reg = 0x00009888, .val = 0x01870000 },
         { .reg = 0x00009888, .val = 0x19930800 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x1B952000 },
         { .reg = 0x00009888, .val = 0x1D955055 },
         { .reg = 0x00009888, .val = 0x1F951455 },
         { .reg = 0x00009888, .val = 0x0992A000 },
         { .reg = 0x00009888, .val = 0x0F928000 },
         { .reg = 0x00009888, .val = 0x1192A800 },
         { .reg = 0x00009888, .val = 0x1392028A },
         { .reg = 0x00009888, .val = 0x0B92A000 },
         { .reg = 0x00009888, .val = 0x0D922000 },
         { .reg = 0x00009888, .val = 0x13908000 },
         { .reg = 0x00009888, .val = 0x21908000 },
         { .reg = 0x00009888, .val = 0x23908000 },
         { .reg = 0x00009888, .val = 0x25908000 },
         { .reg = 0x00009888, .val = 0x27908000 },
         { .reg = 0x00009888, .val = 0x29908000 },
         { .reg = 0x00009888, .val = 0x2B908000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F908000 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x15908000 },
         { .reg = 0x00009888, .val = 0x17908000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900C01 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900863 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900061 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x45900C22 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFEA },
         { .reg = 0x00002774, .val = 0x00007FFC },
         { .reg = 0x00002778, .val = 0x0007AFFA },
         { .reg = 0x0000277C, .val = 0x0000F5FD },
         { .reg = 0x00002780, .val = 0x00079FFA },
         { .reg = 0x00002784, .val = 0x0000F3FB },
         { .reg = 0x00002788, .val = 0x0007BF7A },
         { .reg = 0x0000278C, .val = 0x0000F7E7 },
         { .reg = 0x00002790, .val = 0x0007FEFA },
         { .reg = 0x00002794, .val = 0x0000F7CF },
         { .reg = 0x00002798, .val = 0x00077FFA },
         { .reg = 0x0000279C, .val = 0x0000EFDF },
         { .reg = 0x000027A0, .val = 0x0006FFFA },
         { .reg = 0x000027A4, .val = 0x0000CFBF },
         { .reg = 0x000027A8, .val = 0x0003FFFA },
         { .reg = 0x000027AC, .val = 0x00005F7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_memory_reads_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Reads Distribution metrics set";
   query->symbol_name = "MemoryReads";
   query->guid = "1a356946-5428-450b-a2f0-89f8783a302d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19800343 },
         { .reg = 0x00009888, .val = 0x39900340 },
         { .reg = 0x00009888, .val = 0x3F901000 },
         { .reg = 0x00009888, .val = 0x41900003 },
         { .reg = 0x00009888, .val = 0x03803180 },
         { .reg = 0x00009888, .val = 0x058035E2 },
         { .reg = 0x00009888, .val = 0x0780006A },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x2181A000 },
         { .reg = 0x00009888, .val = 0x2381000A },
         { .reg = 0x00009888, .val = 0x1D950550 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D92A000 },
         { .reg = 0x00009888, .val = 0x0F922000 },
         { .reg = 0x00009888, .val = 0x13900170 },
         { .reg = 0x00009888, .val = 0x21900171 },
         { .reg = 0x00009888, .val = 0x23900172 },
         { .reg = 0x00009888, .val = 0x25900173 },
         { .reg = 0x00009888, .val = 0x27900174 },
         { .reg = 0x00009888, .val = 0x29900175 },
         { .reg = 0x00009888, .val = 0x2B900176 },
         { .reg = 0x00009888, .val = 0x2D900177 },
         { .reg = 0x00009888, .val = 0x2F90017F },
         { .reg = 0x00009888, .val = 0x31900125 },
         { .reg = 0x00009888, .val = 0x15900123 },
         { .reg = 0x00009888, .val = 0x17900121 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43901084 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47901080 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49901084 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B901084 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900004 },
         { .reg = 0x00009888, .val = 0x45900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F872 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__memory_reads__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__memory_reads__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__memory_reads__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__memory_reads__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_cmd_streamer_memory_reads__read;
      counter->name = "GtiCmdStreamerMemoryReads";
      counter->desc = "The total number of GTI memory reads from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryReads";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_rs_memory_reads__read;
      counter->name = "GtiRsMemoryReads";
      counter->desc = "The total number of GTI memory reads from Resource Streamer. Unit: messages.";
      counter->symbol_name = "GtiRsMemoryReads";
      counter->category = "GTI/3D Pipe/Resource Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_vf_memory_reads__read;
      counter->name = "GtiVfMemoryReads";
      counter->desc = "The total number of GTI memory reads from Vertex Fetch. Unit: messages.";
      counter->symbol_name = "GtiVfMemoryReads";
      counter->category = "GTI/3D Pipe/Vertex Fetch";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_rcc_memory_reads__read;
      counter->name = "GtiRccMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Color Cache (Render Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_msc_memory_reads__read;
      counter->name = "GtiMscMemoryReads";
      counter->desc = "The total number of GTI memory reads from Multisampling Color Cache (Multisampling Color Cache misses). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryReads";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_hiz_memory_reads__read;
      counter->name = "GtiHizMemoryReads";
      counter->desc = "The total number of GTI memory reads from Hierarchical Depth Cache (Hi-Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiHizMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_stc_memory_reads__read;
      counter->name = "GtiStcMemoryReads";
      counter->desc = "The total number of GTI memory reads from Stencil Cache (Stencil Cache misses). Unit: messages.";
      counter->symbol_name = "GtiStcMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_rcz_memory_reads__read;
      counter->name = "GtiRczMemoryReads";
      counter->desc = "The total number of GTI memory reads from Render Depth Cache (Render Depth Cache misses). Unit: messages.";
      counter->symbol_name = "GtiRczMemoryReads";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_memory_reads__read;
      counter->name = "GtiMemoryReads";
      counter->desc = "The total number of GTI memory reads. Unit: messages.";
      counter->symbol_name = "GtiMemoryReads";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_l3_bank0_reads__read;
      counter->name = "GtiL3Bank0Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 0 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_l3_bank1_reads__read;
      counter->name = "GtiL3Bank1Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 1 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_l3_bank2_reads__read;
      counter->name = "GtiL3Bank2Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 2 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_l3_bank3_reads__read;
      counter->name = "GtiL3Bank3Reads";
      counter->desc = "The total number of GTI memory reads from L3 Bank 3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_l3_reads__read;
      counter->name = "GtiL3Reads";
      counter->desc = "The total number of GTI memory reads from L3 (L3 Cache misses). Unit: messages.";
      counter->symbol_name = "GtiL3Reads";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_reads__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all accesses from GTI to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_memory_writes_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Memory Writes Distribution metrics set";
   query->symbol_name = "MemoryWrites";
   query->guid = "5299be9d-7a61-4c99-9f81-f87e6c5aaca9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19800343 },
         { .reg = 0x00009888, .val = 0x39900340 },
         { .reg = 0x00009888, .val = 0x3F900000 },
         { .reg = 0x00009888, .val = 0x41900080 },
         { .reg = 0x00009888, .val = 0x03803180 },
         { .reg = 0x00009888, .val = 0x058035E2 },
         { .reg = 0x00009888, .val = 0x0780006A },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x2181A000 },
         { .reg = 0x00009888, .val = 0x2381000A },
         { .reg = 0x00009888, .val = 0x1D950550 },
         { .reg = 0x00009888, .val = 0x0B928000 },
         { .reg = 0x00009888, .val = 0x0D92A000 },
         { .reg = 0x00009888, .val = 0x0F922000 },
         { .reg = 0x00009888, .val = 0x13900180 },
         { .reg = 0x00009888, .val = 0x21900181 },
         { .reg = 0x00009888, .val = 0x23900182 },
         { .reg = 0x00009888, .val = 0x25900183 },
         { .reg = 0x00009888, .val = 0x27900184 },
         { .reg = 0x00009888, .val = 0x29900185 },
         { .reg = 0x00009888, .val = 0x2B900186 },
         { .reg = 0x00009888, .val = 0x2D900187 },
         { .reg = 0x00009888, .val = 0x2F900170 },
         { .reg = 0x00009888, .val = 0x31900125 },
         { .reg = 0x00009888, .val = 0x15900123 },
         { .reg = 0x00009888, .val = 0x17900121 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x19908000 },
         { .reg = 0x00009888, .val = 0x1B908000 },
         { .reg = 0x00009888, .val = 0x1D908000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43901084 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47901080 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49901084 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B901084 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900004 },
         { .reg = 0x00009888, .val = 0x45900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000272C, .val = 0xFFFFFFFF },
         { .reg = 0x00002728, .val = 0xFFFFFFFF },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x0000271C, .val = 0xFFFFFFFF },
         { .reg = 0x00002718, .val = 0xFFFFFFFF },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x0000274C, .val = 0x86543210 },
         { .reg = 0x00002748, .val = 0x86543210 },
         { .reg = 0x00002744, .val = 0x00006667 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x0000275C, .val = 0x86543210 },
         { .reg = 0x00002758, .val = 0x86543210 },
         { .reg = 0x00002754, .val = 0x00006465 },
         { .reg = 0x00002750, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007F81A },
         { .reg = 0x00002774, .val = 0x0000FE00 },
         { .reg = 0x00002778, .val = 0x0007F82A },
         { .reg = 0x0000277C, .val = 0x0000FE00 },
         { .reg = 0x00002780, .val = 0x0007F822 },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x0007F8BA },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x0007F87A },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x0007F8EA },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x0007F8E2 },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x0007F8F2 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00015014 },
         { .reg = 0x0000E658, .val = 0x00025024 },
         { .reg = 0x0000E758, .val = 0x00035034 },
         { .reg = 0x0000E45C, .val = 0x00045044 },
         { .reg = 0x0000E55C, .val = 0x00055054 },
         { .reg = 0x0000E65C, .val = 0x00065064 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__memory_writes__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__memory_writes__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__memory_writes__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__memory_writes__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_cmd_streamer_memory_writes__read;
      counter->name = "GtiCmdStreamerMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Command Streamer. Unit: messages.";
      counter->symbol_name = "GtiCmdStreamerMemoryWrites";
      counter->category = "GTI/3D Pipe/Command Streamer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_so_memory_writes__read;
      counter->name = "GtiSoMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stream Output. Unit: messages.";
      counter->symbol_name = "GtiSoMemoryWrites";
      counter->category = "GTI/3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_rcc_memory_writes__read;
      counter->name = "GtiRccMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Color Cache (Render Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiRccMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_msc_memory_writes__read;
      counter->name = "GtiMscMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Multisampling Color Cache (Multisampling Color Cache invalidations). Unit: messages.";
      counter->symbol_name = "GtiMscMemoryWrites";
      counter->category = "GTI/Color Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_hiz_memory_writes__read;
      counter->name = "GtiHizMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Hierarchical Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiHizMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_stc_memory_writes__read;
      counter->name = "GtiStcMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Stencil Cache. Unit: messages.";
      counter->symbol_name = "GtiStcMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_rcz_memory_writes__read;
      counter->name = "GtiRczMemoryWrites";
      counter->desc = "The total number of GTI memory writes from Render Depth Cache. Unit: messages.";
      counter->symbol_name = "GtiRczMemoryWrites";
      counter->category = "GTI/Depth Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_memory_writes__read;
      counter->name = "GtiMemoryWrites";
      counter->desc = "The total number of GTI memory writes. Unit: messages.";
      counter->symbol_name = "GtiMemoryWrites";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_l3_bank0_writes__read;
      counter->name = "GtiL3Bank0Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 0 (L3 Bank 0 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank0Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_l3_bank1_writes__read;
      counter->name = "GtiL3Bank1Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 1 (L3 Bank 1 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank1Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_l3_bank2_writes__read;
      counter->name = "GtiL3Bank2Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 2 (L3 Bank 2 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank2Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_l3_bank3_writes__read;
      counter->name = "GtiL3Bank3Writes";
      counter->desc = "The total number of GTI memory writes from L3 Bank 3 (L3 Bank 3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Bank3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_l3_writes__read;
      counter->name = "GtiL3Writes";
      counter->desc = "The total number of GTI memory writes from L3 (L3 invalidations). Unit: messages.";
      counter->symbol_name = "GtiL3Writes";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 304;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__memory_writes__gti_ring_accesses__read;
      counter->name = "GtiRingAccesses";
      counter->desc = "The total number of all GTI accesses to the ring. Unit: messages.";
      counter->symbol_name = "GtiRingAccesses";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 312;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extended set";
   query->symbol_name = "ComputeExtended";
   query->guid = "bc9bcff2-459a-4cbc-986d-a84b077153f3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F00E0 },
         { .reg = 0x00009888, .val = 0x141C0160 },
         { .reg = 0x00009888, .val = 0x161C0015 },
         { .reg = 0x00009888, .val = 0x181C0120 },
         { .reg = 0x00009888, .val = 0x002D5000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D5000 },
         { .reg = 0x00009888, .val = 0x0A2D5000 },
         { .reg = 0x00009888, .val = 0x0C2D5000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x0C2E5400 },
         { .reg = 0x00009888, .val = 0x0E2E5515 },
         { .reg = 0x00009888, .val = 0x102E0155 },
         { .reg = 0x00009888, .val = 0x044CC000 },
         { .reg = 0x00009888, .val = 0x0A4C8000 },
         { .reg = 0x00009888, .val = 0x0C4CC000 },
         { .reg = 0x00009888, .val = 0x0E4CC000 },
         { .reg = 0x00009888, .val = 0x104C8000 },
         { .reg = 0x00009888, .val = 0x124C8000 },
         { .reg = 0x00009888, .val = 0x144C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x064CC000 },
         { .reg = 0x00009888, .val = 0x084CC000 },
         { .reg = 0x00009888, .val = 0x004EA000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084EA000 },
         { .reg = 0x00009888, .val = 0x0A4EA000 },
         { .reg = 0x00009888, .val = 0x0C4EA000 },
         { .reg = 0x00009888, .val = 0x0E4EA000 },
         { .reg = 0x00009888, .val = 0x024EA000 },
         { .reg = 0x00009888, .val = 0x044EA000 },
         { .reg = 0x00009888, .val = 0x0E4F4B41 },
         { .reg = 0x00009888, .val = 0x004F4200 },
         { .reg = 0x00009888, .val = 0x024F404C },
         { .reg = 0x00009888, .val = 0x1C4F0000 },
         { .reg = 0x00009888, .val = 0x1A4F0000 },
         { .reg = 0x00009888, .val = 0x001B4000 },
         { .reg = 0x00009888, .val = 0x061B8000 },
         { .reg = 0x00009888, .val = 0x081BC000 },
         { .reg = 0x00009888, .val = 0x0A1BC000 },
         { .reg = 0x00009888, .val = 0x0C1BC000 },
         { .reg = 0x00009888, .val = 0x041BC000 },
         { .reg = 0x00009888, .val = 0x001C0031 },
         { .reg = 0x00009888, .val = 0x061C1900 },
         { .reg = 0x00009888, .val = 0x081C1A33 },
         { .reg = 0x00009888, .val = 0x0A1C1B35 },
         { .reg = 0x00009888, .val = 0x0C1C3337 },
         { .reg = 0x00009888, .val = 0x041C31C7 },
         { .reg = 0x00009888, .val = 0x180F5000 },
         { .reg = 0x00009888, .val = 0x1A0FA8AA },
         { .reg = 0x00009888, .val = 0x1C0F0AAA },
         { .reg = 0x00009888, .val = 0x182C8000 },
         { .reg = 0x00009888, .val = 0x1C2C6AAA },
         { .reg = 0x00009888, .val = 0x1E2C0001 },
         { .reg = 0x00009888, .val = 0x1A2C2950 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x1993AAAA },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29904000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900420 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900400 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900001 },
         { .reg = 0x00009888, .val = 0x45900001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FC2A },
         { .reg = 0x00002774, .val = 0x0000BF00 },
         { .reg = 0x00002778, .val = 0x0007FC6A },
         { .reg = 0x0000277C, .val = 0x0000BF00 },
         { .reg = 0x00002780, .val = 0x0007FC92 },
         { .reg = 0x00002784, .val = 0x0000BF00 },
         { .reg = 0x00002788, .val = 0x0007FCA2 },
         { .reg = 0x0000278C, .val = 0x0000BF00 },
         { .reg = 0x00002790, .val = 0x0007FC32 },
         { .reg = 0x00002794, .val = 0x0000BF00 },
         { .reg = 0x00002798, .val = 0x0007FC9A },
         { .reg = 0x0000279C, .val = 0x0000BF00 },
         { .reg = 0x000027A0, .val = 0x0007FE6A },
         { .reg = 0x000027A4, .val = 0x0000BF00 },
         { .reg = 0x000027A8, .val = 0x0007FE7A },
         { .reg = 0x000027AC, .val = 0x0000BF00 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00778008 },
         { .reg = 0x0000E45C, .val = 0x00088078 },
         { .reg = 0x0000E55C, .val = 0x00808708 },
         { .reg = 0x0000E65C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 52;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 60;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_untyped_reads0__read;
      counter->name = "EuUntypedReads0";
      counter->desc = "The subslice 0 EU Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_typed_reads0__read;
      counter->name = "EuTypedReads0";
      counter->desc = "The subslice 0 EU Typed Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_untyped_writes0__read;
      counter->name = "EuUntypedWrites0";
      counter->desc = "The subslice 0 EU Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_typed_writes0__read;
      counter->name = "EuTypedWrites0";
      counter->desc = "The subslice 0 EU Typed Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_untyped_atomics0__read;
      counter->name = "EuUntypedAtomics0";
      counter->desc = "The subslice 0 EU Untyped Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuUntypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_typed_atomics0__read;
      counter->name = "EuTypedAtomics0";
      counter->desc = "The subslice 0 EU Typed Atomics subslice 0. Unit: messages.";
      counter->symbol_name = "EuTypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_a64_untyped_reads0__read;
      counter->name = "EuA64UntypedReads0";
      counter->desc = "The subslice 0 EU A64 Untyped Reads subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__eu_a64_untyped_writes0__read;
      counter->name = "EuA64UntypedWrites0";
      counter->desc = "The subslice 0 EU A64 Untyped Writes subslice 0. Unit: messages.";
      counter->symbol_name = "EuA64UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__typed_reads0__read;
      counter->name = "Typed Reads 0";
      counter->desc = "The subslice 0 typed reads. Unit: messages.";
      counter->symbol_name = "TypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__typed_writes0__read;
      counter->name = "Typed Writes 0";
      counter->desc = "The subslice 0 typed writes. Unit: messages.";
      counter->symbol_name = "TypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__untyped_reads0__read;
      counter->name = "Untyped Reads 0";
      counter->desc = "The subslice 0 untyped reads (including SLM reads). Unit: messages.";
      counter->symbol_name = "UntypedReads0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__untyped_writes0__read;
      counter->name = "Untyped Writes 0";
      counter->desc = "The subslice 0 untyped writes (including SLM writes). Unit: messages.";
      counter->symbol_name = "UntypedWrites0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extended__typed_atomics0__read;
      counter->name = "Typed Atomics 0";
      counter->desc = "The subslice 0 typed atomics. Unit: messages.";
      counter->symbol_name = "TypedAtomics0";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__typed_reads_per_cache_line__read;
      counter->name = "TypedReadsPerCacheLine";
      counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
      counter->symbol_name = "TypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__typed_writes_per_cache_line__read;
      counter->name = "TypedWritesPerCacheLine";
      counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
      counter->symbol_name = "TypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__untyped_writes_per_cache_line__read;
      counter->name = "UntypedWritesPerCacheLine";
      counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
      counter->symbol_name = "UntypedWritesPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extended__typed_atomics_per_cache_line__read;
      counter->name = "TypedAtomicsPerCacheLine";
      counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
      counter->symbol_name = "TypedAtomicsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "88ec931f-5b4a-453a-9db6-a61232b6143d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 54);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x166C03B0 },
         { .reg = 0x00009888, .val = 0x1593001E },
         { .reg = 0x00009888, .val = 0x3F900C00 },
         { .reg = 0x00009888, .val = 0x41900000 },
         { .reg = 0x00009888, .val = 0x002D1000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D5000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x0C2E0400 },
         { .reg = 0x00009888, .val = 0x0E2E1500 },
         { .reg = 0x00009888, .val = 0x102E0140 },
         { .reg = 0x00009888, .val = 0x044C4000 },
         { .reg = 0x00009888, .val = 0x0A4C8000 },
         { .reg = 0x00009888, .val = 0x0C4CC000 },
         { .reg = 0x00009888, .val = 0x144C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x004E2000 },
         { .reg = 0x00009888, .val = 0x064E8000 },
         { .reg = 0x00009888, .val = 0x084EA000 },
         { .reg = 0x00009888, .val = 0x0E4EA000 },
         { .reg = 0x00009888, .val = 0x1A4F4001 },
         { .reg = 0x00009888, .val = 0x1C4F5005 },
         { .reg = 0x00009888, .val = 0x006C0051 },
         { .reg = 0x00009888, .val = 0x066C5000 },
         { .reg = 0x00009888, .val = 0x086C5C5D },
         { .reg = 0x00009888, .val = 0x0E6C5E5F },
         { .reg = 0x00009888, .val = 0x106C0000 },
         { .reg = 0x00009888, .val = 0x146C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0000 },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x180F1000 },
         { .reg = 0x00009888, .val = 0x1A0FA800 },
         { .reg = 0x00009888, .val = 0x1C0F0A00 },
         { .reg = 0x00009888, .val = 0x182C4000 },
         { .reg = 0x00009888, .val = 0x1C2C4015 },
         { .reg = 0x00009888, .val = 0x1E2C0001 },
         { .reg = 0x00009888, .val = 0x03931980 },
         { .reg = 0x00009888, .val = 0x05930032 },
         { .reg = 0x00009888, .val = 0x11930000 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x1993A00A },
         { .reg = 0x00009888, .val = 0x07930000 },
         { .reg = 0x00009888, .val = 0x09930000 },
         { .reg = 0x00009888, .val = 0x1D900177 },
         { .reg = 0x00009888, .val = 0x1F900178 },
         { .reg = 0x00009888, .val = 0x35900000 },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x53901000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x55900111 },
         { .reg = 0x00009888, .val = 0x47900001 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x0007FFFA },
         { .reg = 0x00002774, .val = 0x0000FEFE },
         { .reg = 0x00002778, .val = 0x0007FFFA },
         { .reg = 0x0000277C, .val = 0x0000FEFD },
         { .reg = 0x00002790, .val = 0x0007FFFA },
         { .reg = 0x00002794, .val = 0x0000FBEF },
         { .reg = 0x00002798, .val = 0x0007FFFA },
         { .reg = 0x0000279C, .val = 0x0000FBDF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "L3 Bank 00 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "L3 Bank 01 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 01. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "L3 Bank 02 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 02. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "L3 Bank 03 Accesses";
         counter->desc = "The total number of accesses to L3 Bank 03. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 328;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_bank00_ic_accesses__read;
         counter->name = "L3 Bank 00 IC Accesses";
         counter->desc = "The total number of accesses to L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcAccesses";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 336;
      }

      if (perf->sys_vars.slice_mask & 0x01) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = glk__compute_l3_cache__l3_bank00_ic_hits__read;
         counter->name = "L3 Bank 00 IC Hits";
         counter->desc = "The total number of hits in L3 Bank 00 from IC cache. Unit: messages.";
         counter->symbol_name = "L3Bank00IcHits";
         counter->category = "L3/IC";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 344;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "0329ea02-ebb8-43f1-bf89-c5bdcccd3eb4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 39);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x104F0232 },
         { .reg = 0x00009888, .val = 0x124F4640 },
         { .reg = 0x00009888, .val = 0x11834400 },
         { .reg = 0x00009888, .val = 0x022D4000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0E2E0055 },
         { .reg = 0x00009888, .val = 0x064C8000 },
         { .reg = 0x00009888, .val = 0x084CC000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x024E8000 },
         { .reg = 0x00009888, .val = 0x044EA000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x024F6100 },
         { .reg = 0x00009888, .val = 0x044F416B },
         { .reg = 0x00009888, .val = 0x064F004B },
         { .reg = 0x00009888, .val = 0x1A4F0000 },
         { .reg = 0x00009888, .val = 0x1A0F02A8 },
         { .reg = 0x00009888, .val = 0x1A2C5500 },
         { .reg = 0x00009888, .val = 0x0F808000 },
         { .reg = 0x00009888, .val = 0x25810020 },
         { .reg = 0x00009888, .val = 0x0F8305C0 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x1F951000 },
         { .reg = 0x00009888, .val = 0x13920200 },
         { .reg = 0x00009888, .val = 0x31908000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4D900003 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x45900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FDFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss0)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "HDC stalled by L3 (s0.ss1)";
         counter->desc = "Percentage of time when HDC has messges to L3, but it's stalled due to lack of credits (s0.ss1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__hdc_and_sf__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "5e57a25a-1d18-4e94-b84b-08ea66751b8c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x12643400 },
         { .reg = 0x00009888, .val = 0x12653400 },
         { .reg = 0x00009888, .val = 0x106C6800 },
         { .reg = 0x00009888, .val = 0x126C001E },
         { .reg = 0x00009888, .val = 0x166C0010 },
         { .reg = 0x00009888, .val = 0x0C2D5000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x002D4000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x102E0154 },
         { .reg = 0x00009888, .val = 0x0C2E5000 },
         { .reg = 0x00009888, .val = 0x0E2E0055 },
         { .reg = 0x00009888, .val = 0x104C8000 },
         { .reg = 0x00009888, .val = 0x124C8000 },
         { .reg = 0x00009888, .val = 0x144C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x044C8000 },
         { .reg = 0x00009888, .val = 0x064CC000 },
         { .reg = 0x00009888, .val = 0x084CC000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x0C4EA000 },
         { .reg = 0x00009888, .val = 0x0E4EA000 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x024EA000 },
         { .reg = 0x00009888, .val = 0x044EA000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x1C4F5500 },
         { .reg = 0x00009888, .val = 0x1A4F1554 },
         { .reg = 0x00009888, .val = 0x0A640024 },
         { .reg = 0x00009888, .val = 0x10640000 },
         { .reg = 0x00009888, .val = 0x04640000 },
         { .reg = 0x00009888, .val = 0x0C650024 },
         { .reg = 0x00009888, .val = 0x10650000 },
         { .reg = 0x00009888, .val = 0x06650000 },
         { .reg = 0x00009888, .val = 0x0C6C5327 },
         { .reg = 0x00009888, .val = 0x0E6C5425 },
         { .reg = 0x00009888, .val = 0x006C2A00 },
         { .reg = 0x00009888, .val = 0x026C285B },
         { .reg = 0x00009888, .val = 0x046C005C },
         { .reg = 0x00009888, .val = 0x1C6C0000 },
         { .reg = 0x00009888, .val = 0x1A6C0900 },
         { .reg = 0x00009888, .val = 0x1C0F0AA0 },
         { .reg = 0x00009888, .val = 0x180F4000 },
         { .reg = 0x00009888, .val = 0x1A0F02AA },
         { .reg = 0x00009888, .val = 0x1C2C5400 },
         { .reg = 0x00009888, .val = 0x1E2C0001 },
         { .reg = 0x00009888, .val = 0x1A2C5550 },
         { .reg = 0x00009888, .val = 0x1993AA00 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x2B904000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900421 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900001 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900420 },
         { .reg = 0x00009888, .val = 0x45900021 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00100070 },
         { .reg = 0x00002774, .val = 0x0000FFF1 },
         { .reg = 0x00002778, .val = 0x00014002 },
         { .reg = 0x0000277C, .val = 0x0000C3FF },
         { .reg = 0x00002780, .val = 0x00010002 },
         { .reg = 0x00002784, .val = 0x0000C7FF },
         { .reg = 0x00002788, .val = 0x00004002 },
         { .reg = 0x0000278C, .val = 0x0000D3FF },
         { .reg = 0x00002790, .val = 0x00100700 },
         { .reg = 0x00002794, .val = 0x0000FF1F },
         { .reg = 0x00002798, .val = 0x00001402 },
         { .reg = 0x0000279C, .val = 0x0000FC3F },
         { .reg = 0x000027A0, .val = 0x00001002 },
         { .reg = 0x000027A4, .val = 0x0000FC7F },
         { .reg = 0x000027A8, .val = 0x00000402 },
         { .reg = 0x000027AC, .val = 0x0000FD3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__l3_1__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__l3_1__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__l3_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "f0652373-d361-426f-b36d-9ffec288bdc6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x102D7800 },
         { .reg = 0x00009888, .val = 0x122D79E0 },
         { .reg = 0x00009888, .val = 0x0C2F0004 },
         { .reg = 0x00009888, .val = 0x100E3800 },
         { .reg = 0x00009888, .val = 0x180F0005 },
         { .reg = 0x00009888, .val = 0x002D0940 },
         { .reg = 0x00009888, .val = 0x022D802F },
         { .reg = 0x00009888, .val = 0x042D4013 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0E2E0050 },
         { .reg = 0x00009888, .val = 0x022F0010 },
         { .reg = 0x00009888, .val = 0x002F0000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x044E8000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x040E0480 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x060F0027 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x1A0F0040 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x439014A0 },
         { .reg = 0x00009888, .val = 0x459000A4 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000EFFF },
         { .reg = 0x00002778, .val = 0x00006000 },
         { .reg = 0x0000277C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__pixel_data0_ready__read;
         counter->name = "Slice0 Post-EarlyZ Pixel Data Ready";
         counter->desc = "The percentage of time in which slice0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
         counter->symbol_name = "PixelData0Ready";
         counter->category = "GPU/Rasterizer/Early Depth Test";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__ps_output0_available__read;
         counter->name = "Slice0 PS Output Available";
         counter->desc = "The percentage of time in which slice0 PS output is available Unit: percent.";
         counter->symbol_name = "PSOutput0Available";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__pixel_values0_ready__read;
         counter->name = "Slice0 Pixel Values Ready";
         counter->desc = "The percentage of time in which slice0 pixel values are ready Unit: percent.";
         counter->symbol_name = "PixelValues0Ready";
         counter->category = "GPU/3D Pipe";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__rasterizer_and_pixel_backend__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_sampler_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler";
   query->symbol_name = "Sampler";
   query->guid = "6ac97d12-ce13-428a-b20c-6902bde2608b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x121300A0 },
         { .reg = 0x00009888, .val = 0x141600AB },
         { .reg = 0x00009888, .val = 0x123300A0 },
         { .reg = 0x00009888, .val = 0x143600AB },
         { .reg = 0x00009888, .val = 0x125300A0 },
         { .reg = 0x00009888, .val = 0x145600AB },
         { .reg = 0x00009888, .val = 0x0C2D4000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x002D4000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x102E01A0 },
         { .reg = 0x00009888, .val = 0x0C2E5000 },
         { .reg = 0x00009888, .val = 0x0E2E0065 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x044C8000 },
         { .reg = 0x00009888, .val = 0x064CC000 },
         { .reg = 0x00009888, .val = 0x084C4000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x0E4E8000 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x024EA000 },
         { .reg = 0x00009888, .val = 0x044E2000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x1C0F0800 },
         { .reg = 0x00009888, .val = 0x180F4000 },
         { .reg = 0x00009888, .val = 0x1A0F023F },
         { .reg = 0x00009888, .val = 0x1E2C0003 },
         { .reg = 0x00009888, .val = 0x1A2CC030 },
         { .reg = 0x00009888, .val = 0x04132180 },
         { .reg = 0x00009888, .val = 0x02130000 },
         { .reg = 0x00009888, .val = 0x0C148000 },
         { .reg = 0x00009888, .val = 0x0E142000 },
         { .reg = 0x00009888, .val = 0x04148000 },
         { .reg = 0x00009888, .val = 0x1E150140 },
         { .reg = 0x00009888, .val = 0x1C150040 },
         { .reg = 0x00009888, .val = 0x0C163000 },
         { .reg = 0x00009888, .val = 0x0E160068 },
         { .reg = 0x00009888, .val = 0x10160000 },
         { .reg = 0x00009888, .val = 0x18160000 },
         { .reg = 0x00009888, .val = 0x0A164000 },
         { .reg = 0x00009888, .val = 0x04330043 },
         { .reg = 0x00009888, .val = 0x02330000 },
         { .reg = 0x00009888, .val = 0x0234A000 },
         { .reg = 0x00009888, .val = 0x04342000 },
         { .reg = 0x00009888, .val = 0x1C350015 },
         { .reg = 0x00009888, .val = 0x02363460 },
         { .reg = 0x00009888, .val = 0x10360000 },
         { .reg = 0x00009888, .val = 0x04360000 },
         { .reg = 0x00009888, .val = 0x06360000 },
         { .reg = 0x00009888, .val = 0x08364000 },
         { .reg = 0x00009888, .val = 0x06530043 },
         { .reg = 0x00009888, .val = 0x02530000 },
         { .reg = 0x00009888, .val = 0x0E548000 },
         { .reg = 0x00009888, .val = 0x00548000 },
         { .reg = 0x00009888, .val = 0x06542000 },
         { .reg = 0x00009888, .val = 0x1E550400 },
         { .reg = 0x00009888, .val = 0x1A552000 },
         { .reg = 0x00009888, .val = 0x1C550100 },
         { .reg = 0x00009888, .val = 0x0E563000 },
         { .reg = 0x00009888, .val = 0x00563400 },
         { .reg = 0x00009888, .val = 0x10560000 },
         { .reg = 0x00009888, .val = 0x18560000 },
         { .reg = 0x00009888, .val = 0x02560000 },
         { .reg = 0x00009888, .val = 0x0C564000 },
         { .reg = 0x00009888, .val = 0x1993A800 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B9014A0 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900001 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900820 },
         { .reg = 0x00009888, .val = 0x45901022 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0000C000 },
         { .reg = 0x00002774, .val = 0x0000E7FF },
         { .reg = 0x00002778, .val = 0x00003000 },
         { .reg = 0x0000277C, .val = 0x0000F9FF },
         { .reg = 0x00002780, .val = 0x00000C00 },
         { .reg = 0x00002784, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__sampler__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__sampler__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__sampler__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__sampler__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__sampler__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__sampler__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__sampler__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__sampler__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__sampler__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__sampler__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "59ea703a-9a35-4aed-a985-0d9ab7aceaba";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A0000 },
         { .reg = 0x00009888, .val = 0x143A0000 },
         { .reg = 0x00009888, .val = 0x145A0000 },
         { .reg = 0x00009888, .val = 0x0C2D4000 },
         { .reg = 0x00009888, .val = 0x0E2D5000 },
         { .reg = 0x00009888, .val = 0x002D4000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x102E0150 },
         { .reg = 0x00009888, .val = 0x0C2E5000 },
         { .reg = 0x00009888, .val = 0x0E2E006A },
         { .reg = 0x00009888, .val = 0x124C8000 },
         { .reg = 0x00009888, .val = 0x144C8000 },
         { .reg = 0x00009888, .val = 0x164C2000 },
         { .reg = 0x00009888, .val = 0x044C8000 },
         { .reg = 0x00009888, .val = 0x064C4000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x0C4E8000 },
         { .reg = 0x00009888, .val = 0x0E4EA000 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x024E2000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x1C0F0BC0 },
         { .reg = 0x00009888, .val = 0x180F4000 },
         { .reg = 0x00009888, .val = 0x1A0F0302 },
         { .reg = 0x00009888, .val = 0x1E2C0003 },
         { .reg = 0x00009888, .val = 0x1A2C00F0 },
         { .reg = 0x00009888, .val = 0x021A3080 },
         { .reg = 0x00009888, .val = 0x041A31E5 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x0414A000 },
         { .reg = 0x00009888, .val = 0x1C150054 },
         { .reg = 0x00009888, .val = 0x06168000 },
         { .reg = 0x00009888, .val = 0x08168000 },
         { .reg = 0x00009888, .val = 0x0A168000 },
         { .reg = 0x00009888, .val = 0x0C3A3280 },
         { .reg = 0x00009888, .val = 0x0E3A0063 },
         { .reg = 0x00009888, .val = 0x063A0061 },
         { .reg = 0x00009888, .val = 0x023A0000 },
         { .reg = 0x00009888, .val = 0x0C348000 },
         { .reg = 0x00009888, .val = 0x0E342000 },
         { .reg = 0x00009888, .val = 0x06342000 },
         { .reg = 0x00009888, .val = 0x1E350140 },
         { .reg = 0x00009888, .val = 0x1C350100 },
         { .reg = 0x00009888, .val = 0x18360028 },
         { .reg = 0x00009888, .val = 0x0C368000 },
         { .reg = 0x00009888, .val = 0x0E5A3080 },
         { .reg = 0x00009888, .val = 0x005A3280 },
         { .reg = 0x00009888, .val = 0x025A0063 },
         { .reg = 0x00009888, .val = 0x0E548000 },
         { .reg = 0x00009888, .val = 0x00548000 },
         { .reg = 0x00009888, .val = 0x02542000 },
         { .reg = 0x00009888, .val = 0x1E550400 },
         { .reg = 0x00009888, .val = 0x1A552000 },
         { .reg = 0x00009888, .val = 0x1C550001 },
         { .reg = 0x00009888, .val = 0x18560080 },
         { .reg = 0x00009888, .val = 0x02568000 },
         { .reg = 0x00009888, .val = 0x04568000 },
         { .reg = 0x00009888, .val = 0x1993A800 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x2D904000 },
         { .reg = 0x00009888, .val = 0x2F904000 },
         { .reg = 0x00009888, .val = 0x31904000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x59900000 },
         { .reg = 0x00009888, .val = 0x4B900420 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
         { .reg = 0x00009888, .val = 0x4D900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x45901084 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x30800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x00007FFF },
         { .reg = 0x00002778, .val = 0x00000000 },
         { .reg = 0x0000277C, .val = 0x00009FFF },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000EFFF },
         { .reg = 0x00002788, .val = 0x00000000 },
         { .reg = 0x0000278C, .val = 0x0000F3FF },
         { .reg = 0x00002790, .val = 0x00000002 },
         { .reg = 0x00002794, .val = 0x0000FDFF },
         { .reg = 0x00002798, .val = 0x00000000 },
         { .reg = 0x0000279C, .val = 0x0000FE7F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice0";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "NonPS Thread Ready For Dispatch on Slice0 Subslice2";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_1__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "f2d6d718-e7b1-48f4-9624-45230d2fe2a0";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 42);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x000091BC, .val = 0xE0500000 },
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A026B },
         { .reg = 0x00009888, .val = 0x143A0173 },
         { .reg = 0x00009888, .val = 0x145A026B },
         { .reg = 0x00009888, .val = 0x002D4000 },
         { .reg = 0x00009888, .val = 0x022D5000 },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0C2E5000 },
         { .reg = 0x00009888, .val = 0x0E2E0069 },
         { .reg = 0x00009888, .val = 0x044C8000 },
         { .reg = 0x00009888, .val = 0x064CC000 },
         { .reg = 0x00009888, .val = 0x0A4C4000 },
         { .reg = 0x00009888, .val = 0x004E8000 },
         { .reg = 0x00009888, .val = 0x024EA000 },
         { .reg = 0x00009888, .val = 0x064E2000 },
         { .reg = 0x00009888, .val = 0x180F6000 },
         { .reg = 0x00009888, .val = 0x1A0F030A },
         { .reg = 0x00009888, .val = 0x1A2C03C0 },
         { .reg = 0x00009888, .val = 0x041A37E7 },
         { .reg = 0x00009888, .val = 0x021A0000 },
         { .reg = 0x00009888, .val = 0x0414A000 },
         { .reg = 0x00009888, .val = 0x1C150050 },
         { .reg = 0x00009888, .val = 0x08168000 },
         { .reg = 0x00009888, .val = 0x0A168000 },
         { .reg = 0x00009888, .val = 0x003A3380 },
         { .reg = 0x00009888, .val = 0x063A006F },
         { .reg = 0x00009888, .val = 0x023A0000 },
         { .reg = 0x00009888, .val = 0x00348000 },
         { .reg = 0x00009888, .val = 0x06342000 },
         { .reg = 0x00009888, .val = 0x1A352000 },
         { .reg = 0x00009888, .val = 0x1C350100 },
         { .reg = 0x00009888, .val = 0x02368000 },
         { .reg = 0x00009888, .val = 0x0C368000 },
         { .reg = 0x00009888, .val = 0x025A37E7 },
         { .reg = 0x00009888, .val = 0x0254A000 },
         { .reg = 0x00009888, .val = 0x1C550005 },
         { .reg = 0x00009888, .val = 0x04568000 },
         { .reg = 0x00009888, .val = 0x06568000 },
         { .reg = 0x00009888, .val = 0x03938000 },
         { .reg = 0x00009888, .val = 0x05938000 },
         { .reg = 0x00009888, .val = 0x07938000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x15904000 },
         { .reg = 0x00009888, .val = 0x17904000 },
         { .reg = 0x00009888, .val = 0x19904000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900020 },
         { .reg = 0x00009888, .val = 0x45901080 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900001 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_2__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_2__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 0x1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_2__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_2__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 0x4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_2__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 0x2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_2__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (true &&
          perf->sys_vars.query_mode) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__tdl_2__gt_request_queue_full__read;
         counter->name = "SQ is full";
         counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
         counter->symbol_name = "GTRequestQueueFull";
         counter->category = "GTI";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_compute_extra_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Extra set";
   query->symbol_name = "ComputeExtra";
   query->guid = "15274c82-27d2-4819-876a-7cb1a2c59ba4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 5);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x141A001F },
         { .reg = 0x00009888, .val = 0x143A001F },
         { .reg = 0x00009888, .val = 0x145A001F },
         { .reg = 0x00009888, .val = 0x042D5000 },
         { .reg = 0x00009888, .val = 0x062D1000 },
         { .reg = 0x00009888, .val = 0x0E2E0094 },
         { .reg = 0x00009888, .val = 0x084CC000 },
         { .reg = 0x00009888, .val = 0x044EA000 },
         { .reg = 0x00009888, .val = 0x1A0F00E0 },
         { .reg = 0x00009888, .val = 0x1A2C0C00 },
         { .reg = 0x00009888, .val = 0x061A0063 },
         { .reg = 0x00009888, .val = 0x021A0000 },
         { .reg = 0x00009888, .val = 0x06142000 },
         { .reg = 0x00009888, .val = 0x1C150100 },
         { .reg = 0x00009888, .val = 0x0C168000 },
         { .reg = 0x00009888, .val = 0x043A3180 },
         { .reg = 0x00009888, .val = 0x023A0000 },
         { .reg = 0x00009888, .val = 0x04348000 },
         { .reg = 0x00009888, .val = 0x1C350040 },
         { .reg = 0x00009888, .val = 0x0A368000 },
         { .reg = 0x00009888, .val = 0x045A0063 },
         { .reg = 0x00009888, .val = 0x025A0000 },
         { .reg = 0x00009888, .val = 0x04542000 },
         { .reg = 0x00009888, .val = 0x1C550010 },
         { .reg = 0x00009888, .val = 0x08568000 },
         { .reg = 0x00009888, .val = 0x09938000 },
         { .reg = 0x00009888, .val = 0x0B938000 },
         { .reg = 0x00009888, .val = 0x0D938000 },
         { .reg = 0x00009888, .val = 0x1B904000 },
         { .reg = 0x00009888, .val = 0x1D904000 },
         { .reg = 0x00009888, .val = 0x1F904000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x45900400 },
         { .reg = 0x00009888, .val = 0x47900004 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00001000 },
         { .reg = 0x0000E558, .val = 0x00003002 },
         { .reg = 0x0000E658, .val = 0x00005004 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00050012 },
         { .reg = 0x0000E55C, .val = 0x00052051 },
         { .reg = 0x0000E65C, .val = 0x00000008 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extra__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extra__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__compute_extra__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__compute_extra__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extra__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = glk__compute_extra__fpu1_active_adjusted__read;
      counter->name = "EU FPU1 Pipe Active including Ext Math";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing including Extended Math processing Unit: percent.";
      counter->symbol_name = "Fpu1ActiveAdjusted";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "MDAPI testing set";
   query->symbol_name = "TestOa";
   query->guid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x19800000 },
         { .reg = 0x00009888, .val = 0x07800063 },
         { .reg = 0x00009888, .val = 0x11800000 },
         { .reg = 0x00009888, .val = 0x23810008 },
         { .reg = 0x00009888, .val = 0x1D950400 },
         { .reg = 0x00009888, .val = 0x0F922000 },
         { .reg = 0x00009888, .val = 0x1F908000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002744, .val = 0x00800000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x00000000 },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x00000000 },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x00000000 },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.166 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
glk_register_pma__stall_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set PMA Stall";
   query->symbol_name = "PMA_Stall";
   query->guid = "e6868953-fb47-431d-a060-f785916558fc";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 4);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00009840, .val = 0x00000080 },
         { .reg = 0x00009888, .val = 0x124C3080 },
         { .reg = 0x00009888, .val = 0x002D1000 },
         { .reg = 0x00009888, .val = 0x062D4000 },
         { .reg = 0x00009888, .val = 0x082D5000 },
         { .reg = 0x00009888, .val = 0x0A2D5000 },
         { .reg = 0x00009888, .val = 0x0C2E0400 },
         { .reg = 0x00009888, .val = 0x0E2E5500 },
         { .reg = 0x00009888, .val = 0x102E0001 },
         { .reg = 0x00009888, .val = 0x004C0045 },
         { .reg = 0x00009888, .val = 0x064C2300 },
         { .reg = 0x00009888, .val = 0x084C26C4 },
         { .reg = 0x00009888, .val = 0x0A4C264E },
         { .reg = 0x00009888, .val = 0x164C0000 },
         { .reg = 0x00009888, .val = 0x044C0000 },
         { .reg = 0x00009888, .val = 0x0C4C0000 },
         { .reg = 0x00009888, .val = 0x0E4C0000 },
         { .reg = 0x00009888, .val = 0x01938000 },
         { .reg = 0x00009888, .val = 0x0F938000 },
         { .reg = 0x00009888, .val = 0x199300AA },
         { .reg = 0x00009888, .val = 0x13904000 },
         { .reg = 0x00009888, .val = 0x21904000 },
         { .reg = 0x00009888, .val = 0x23904000 },
         { .reg = 0x00009888, .val = 0x25904000 },
         { .reg = 0x00009888, .val = 0x27904000 },
         { .reg = 0x00009888, .val = 0x29904000 },
         { .reg = 0x00009888, .val = 0x53900000 },
         { .reg = 0x00009888, .val = 0x43900000 },
         { .reg = 0x00009888, .val = 0x55900000 },
         { .reg = 0x00009888, .val = 0x47900000 },
         { .reg = 0x00009888, .val = 0x57900000 },
         { .reg = 0x00009888, .val = 0x49900000 },
         { .reg = 0x00009888, .val = 0x37900000 },
         { .reg = 0x00009888, .val = 0x33900000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x30800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00E00021 },
         { .reg = 0x00002774, .val = 0x0007FFF8 },
         { .reg = 0x00002778, .val = 0x07000101 },
         { .reg = 0x0000277C, .val = 0x0038FFC7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__pma__stall__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__pma__stall__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = glk__pma__stall__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = glk__pma__stall__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = glk__pma__stall__stc_pma_stall__read;
         counter->name = "STC PMA stall";
         counter->desc = "Percentage of time when stencil cache line and an overlapping pixel are causing stalls Unit: percent.";
         counter->symbol_name = "StcPMAStall";
         counter->category = "GPU/Stencil Cache";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 24;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_glk(struct intel_perf_config *perf)
{
   glk_register_render_basic_counter_query(perf);
   glk_register_compute_basic_counter_query(perf);
   glk_register_render_pipe_profile_counter_query(perf);
   glk_register_memory_reads_counter_query(perf);
   glk_register_memory_writes_counter_query(perf);
   glk_register_compute_extended_counter_query(perf);
   glk_register_compute_l3_cache_counter_query(perf);
   glk_register_hdc_and_sf_counter_query(perf);
   glk_register_l3_1_counter_query(perf);
   glk_register_rasterizer_and_pixel_backend_counter_query(perf);
   glk_register_sampler_counter_query(perf);
   glk_register_tdl_1_counter_query(perf);
   glk_register_tdl_2_counter_query(perf);
   glk_register_compute_extra_counter_query(perf);
   glk_register_test_oa_counter_query(perf);
   glk_register_pma__stall_counter_query(perf);
}


static void
icl_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "e3cd52cf-c6b0-4019-b369-3bc9c75a0cbc";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x142C0014 },
         { .reg = 0x00009888, .val = 0x14120700 },
         { .reg = 0x00009888, .val = 0x121500E0 },
         { .reg = 0x00009888, .val = 0x1C1E000C },
         { .reg = 0x00009888, .val = 0x0E1F000C },
         { .reg = 0x00009888, .val = 0x1C200014 },
         { .reg = 0x00009888, .val = 0x16212800 },
         { .reg = 0x00009888, .val = 0x0E2C2041 },
         { .reg = 0x00009888, .val = 0x102C0000 },
         { .reg = 0x00009888, .val = 0x1A2C0000 },
         { .reg = 0x00009888, .val = 0x10040140 },
         { .reg = 0x00009888, .val = 0x0E040005 },
         { .reg = 0x00009888, .val = 0x14050050 },
         { .reg = 0x00009888, .val = 0x06054000 },
         { .reg = 0x00009888, .val = 0x08051000 },
         { .reg = 0x00009888, .val = 0x260600C0 },
         { .reg = 0x00009888, .val = 0x24061800 },
         { .reg = 0x00009888, .val = 0x04120023 },
         { .reg = 0x00009888, .val = 0x10120000 },
         { .reg = 0x00009888, .val = 0x08120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04144000 },
         { .reg = 0x00009888, .val = 0x02150980 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x1815000F },
         { .reg = 0x00009888, .val = 0x06150000 },
         { .reg = 0x00009888, .val = 0x08154000 },
         { .reg = 0x00009888, .val = 0x0E18A000 },
         { .reg = 0x00009888, .val = 0x14190028 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1A1C01C0 },
         { .reg = 0x00009888, .val = 0x1C1C000A },
         { .reg = 0x00009888, .val = 0x1A5C01C0 },
         { .reg = 0x00009888, .val = 0x1C5C000A },
         { .reg = 0x00009888, .val = 0x001C0097 },
         { .reg = 0x00009888, .val = 0x061C9400 },
         { .reg = 0x00009888, .val = 0x0A1C00A7 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x081D8100 },
         { .reg = 0x00009888, .val = 0x0A1D0004 },
         { .reg = 0x00009888, .val = 0x085C9497 },
         { .reg = 0x00009888, .val = 0x0A5CA700 },
         { .reg = 0x00009888, .val = 0x105C0000 },
         { .reg = 0x00009888, .val = 0x0A5D000B },
         { .reg = 0x00009888, .val = 0x181E8000 },
         { .reg = 0x00009888, .val = 0x1E1EEF80 },
         { .reg = 0x00009888, .val = 0x301F4000 },
         { .reg = 0x00009888, .val = 0x501F2404 },
         { .reg = 0x00009888, .val = 0x521F0092 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x4E1F2000 },
         { .reg = 0x00009888, .val = 0x3E1F0300 },
         { .reg = 0x00009888, .val = 0x461F0303 },
         { .reg = 0x00009888, .val = 0x481F0303 },
         { .reg = 0x00009888, .val = 0x4A1F0003 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x421F4001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00051050 },
         { .reg = 0x0000E45C, .val = 0x00000052 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__render_basic__sampler00_busy__read;
         counter->name = "Sampler00 Busy";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 9) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__render_basic__samplers_busy__read;
         counter->name = "Samplers Busy";
         counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplersBusy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__render_basic__sampler00_bottleneck__read;
         counter->name = "Sampler00 Bottleneck";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      if (perf->sys_vars.subslice_mask & 9) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__render_basic__sampler_bottleneck__read;
         counter->name = "Samplers Bottleneck";
         counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplerBottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "04546170-d541-4804-96f8-007a454a3f2c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x12230012 },
         { .reg = 0x00009888, .val = 0x10230019 },
         { .reg = 0x00009888, .val = 0x12A30012 },
         { .reg = 0x00009888, .val = 0x10A30019 },
         { .reg = 0x00009888, .val = 0x1A200400 },
         { .reg = 0x00009888, .val = 0x1C200020 },
         { .reg = 0x00009888, .val = 0x04208000 },
         { .reg = 0x00009888, .val = 0x08208000 },
         { .reg = 0x00009888, .val = 0x0C208000 },
         { .reg = 0x00009888, .val = 0x0A230031 },
         { .reg = 0x00009888, .val = 0x0E231E00 },
         { .reg = 0x00009888, .val = 0x0223003D },
         { .reg = 0x00009888, .val = 0x04230032 },
         { .reg = 0x00009888, .val = 0x06230033 },
         { .reg = 0x00009888, .val = 0x00230000 },
         { .reg = 0x00009888, .val = 0x1AA01000 },
         { .reg = 0x00009888, .val = 0x1CA00008 },
         { .reg = 0x00009888, .val = 0x02A08000 },
         { .reg = 0x00009888, .val = 0x06A08000 },
         { .reg = 0x00009888, .val = 0x0AA08000 },
         { .reg = 0x00009888, .val = 0x0AA31880 },
         { .reg = 0x00009888, .val = 0x0EA3003C },
         { .reg = 0x00009888, .val = 0x00A31E80 },
         { .reg = 0x00009888, .val = 0x02A31900 },
         { .reg = 0x00009888, .val = 0x04A31980 },
         { .reg = 0x00009888, .val = 0x0E044055 },
         { .reg = 0x00009888, .val = 0x10040141 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050050 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600CC },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x18123000 },
         { .reg = 0x00009888, .val = 0x1A12000C },
         { .reg = 0x00009888, .val = 0x0212C000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0A12C000 },
         { .reg = 0x00009888, .val = 0x08138000 },
         { .reg = 0x00009888, .val = 0x0A134000 },
         { .reg = 0x00009888, .val = 0x0413A000 },
         { .reg = 0x00009888, .val = 0x06132000 },
         { .reg = 0x00009888, .val = 0x0A148000 },
         { .reg = 0x00009888, .val = 0x0E144000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x04148000 },
         { .reg = 0x00009888, .val = 0x16150380 },
         { .reg = 0x00009888, .val = 0x1815000D },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x0415C000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A154000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x0A182000 },
         { .reg = 0x00009888, .val = 0x0E188000 },
         { .reg = 0x00009888, .val = 0x02182000 },
         { .reg = 0x00009888, .val = 0x04182000 },
         { .reg = 0x00009888, .val = 0x06182000 },
         { .reg = 0x00009888, .val = 0x12190400 },
         { .reg = 0x00009888, .val = 0x14190020 },
         { .reg = 0x00009888, .val = 0x0A192000 },
         { .reg = 0x00009888, .val = 0x0C192000 },
         { .reg = 0x00009888, .val = 0x0E192000 },
         { .reg = 0x00009888, .val = 0x0A1C8000 },
         { .reg = 0x00009888, .val = 0x0E1C2000 },
         { .reg = 0x00009888, .val = 0x001C8000 },
         { .reg = 0x00009888, .val = 0x021C8000 },
         { .reg = 0x00009888, .val = 0x041C8000 },
         { .reg = 0x00009888, .val = 0x121D1000 },
         { .reg = 0x00009888, .val = 0x141D0008 },
         { .reg = 0x00009888, .val = 0x081D8000 },
         { .reg = 0x00009888, .val = 0x0A1D8000 },
         { .reg = 0x00009888, .val = 0x0C1D8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1A1C01C0 },
         { .reg = 0x00009888, .val = 0x1C1C000A },
         { .reg = 0x00009888, .val = 0x1A5C01C0 },
         { .reg = 0x00009888, .val = 0x1C5C000A },
         { .reg = 0x00009888, .val = 0x001C0097 },
         { .reg = 0x00009888, .val = 0x061C9400 },
         { .reg = 0x00009888, .val = 0x0C1C00A7 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x081D8100 },
         { .reg = 0x00009888, .val = 0x0A1D0010 },
         { .reg = 0x00009888, .val = 0x085C9497 },
         { .reg = 0x00009888, .val = 0x0C5CA700 },
         { .reg = 0x00009888, .val = 0x105C0000 },
         { .reg = 0x00009888, .val = 0x0A5D0023 },
         { .reg = 0x00009888, .val = 0x181E8000 },
         { .reg = 0x00009888, .val = 0x1E1E0F80 },
         { .reg = 0x00009888, .val = 0x201E000E },
         { .reg = 0x00009888, .val = 0x301F4000 },
         { .reg = 0x00009888, .val = 0x501F2524 },
         { .reg = 0x00009888, .val = 0x521F2522 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2430 },
         { .reg = 0x00009888, .val = 0x3E1F0300 },
         { .reg = 0x00009888, .val = 0x461F0303 },
         { .reg = 0x00009888, .val = 0x481F3003 },
         { .reg = 0x00009888, .val = 0x4A1F0300 },
         { .reg = 0x00009888, .val = 0x4C1F0003 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x401F3000 },
         { .reg = 0x00009888, .val = 0x421F3000 },
         { .reg = 0x00009888, .val = 0x441F3000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__typed_atomics__read;
      counter->name = "Typed Atomics Accesses";
      counter->desc = "The total number of typed atomic accesses via Data Port. Unit: events.";
      counter->symbol_name = "TypedAtomics";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Extended metrics set";
   query->symbol_name = "ComputeExtended";
   query->guid = "43eb7fc1-dc5e-45e2-a90a-0053f5397271";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 22);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1222000B },
         { .reg = 0x00009888, .val = 0x16220009 },
         { .reg = 0x00009888, .val = 0x12230019 },
         { .reg = 0x00009888, .val = 0x10230012 },
         { .reg = 0x00009888, .val = 0x101E8000 },
         { .reg = 0x00009888, .val = 0x1A1EF800 },
         { .reg = 0x00009888, .val = 0x1C1E0007 },
         { .reg = 0x00009888, .val = 0x001F8000 },
         { .reg = 0x00009888, .val = 0x0C1FF800 },
         { .reg = 0x00009888, .val = 0x0E1F0007 },
         { .reg = 0x00009888, .val = 0x00204000 },
         { .reg = 0x00009888, .val = 0x0E204000 },
         { .reg = 0x00009888, .val = 0x1A202AA0 },
         { .reg = 0x00009888, .val = 0x1C200005 },
         { .reg = 0x00009888, .val = 0x04208000 },
         { .reg = 0x00009888, .val = 0x06208000 },
         { .reg = 0x00009888, .val = 0x08208000 },
         { .reg = 0x00009888, .val = 0x0A208000 },
         { .reg = 0x00009888, .val = 0x0C208000 },
         { .reg = 0x00009888, .val = 0x00214000 },
         { .reg = 0x00009888, .val = 0x0E214000 },
         { .reg = 0x00009888, .val = 0x14214000 },
         { .reg = 0x00009888, .val = 0x16210555 },
         { .reg = 0x00009888, .val = 0x00220011 },
         { .reg = 0x00009888, .val = 0x06220900 },
         { .reg = 0x00009888, .val = 0x08220A13 },
         { .reg = 0x00009888, .val = 0x0A220B15 },
         { .reg = 0x00009888, .val = 0x0C222317 },
         { .reg = 0x00009888, .val = 0x0E220043 },
         { .reg = 0x00009888, .val = 0x02231AB4 },
         { .reg = 0x00009888, .val = 0x04231DBA },
         { .reg = 0x00009888, .val = 0x06230039 },
         { .reg = 0x00009888, .val = 0x00230000 },
         { .reg = 0x00009888, .val = 0x0C044400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040055 },
         { .reg = 0x00009888, .val = 0x04051000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050015 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FD00 },
         { .reg = 0x00009888, .val = 0x2606007F },
         { .reg = 0x00009888, .val = 0x0015C000 },
         { .reg = 0x00009888, .val = 0x0E15C000 },
         { .reg = 0x00009888, .val = 0x16157FF8 },
         { .reg = 0x00009888, .val = 0x18150003 },
         { .reg = 0x00009888, .val = 0x0415C000 },
         { .reg = 0x00009888, .val = 0x0615C000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A15C000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x00182000 },
         { .reg = 0x00009888, .val = 0x0618A000 },
         { .reg = 0x00009888, .val = 0x0818A000 },
         { .reg = 0x00009888, .val = 0x0A18A000 },
         { .reg = 0x00009888, .val = 0x0C18A000 },
         { .reg = 0x00009888, .val = 0x0E182000 },
         { .reg = 0x00009888, .val = 0x0218A000 },
         { .reg = 0x00009888, .val = 0x0418A000 },
         { .reg = 0x00009888, .val = 0x08192000 },
         { .reg = 0x00009888, .val = 0x0E19A000 },
         { .reg = 0x00009888, .val = 0x12195540 },
         { .reg = 0x00009888, .val = 0x1419000A },
         { .reg = 0x00009888, .val = 0x0A19A000 },
         { .reg = 0x00009888, .val = 0x0C19A000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x16136860 },
         { .reg = 0x00009888, .val = 0x24000004 },
         { .reg = 0x00009888, .val = 0x20000040 },
         { .reg = 0x00009888, .val = 0x0E132980 },
         { .reg = 0x00009888, .val = 0x00132D80 },
         { .reg = 0x00009888, .val = 0x10130000 },
         { .reg = 0x00009888, .val = 0x1A130000 },
         { .reg = 0x00009888, .val = 0x02130000 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F000C },
         { .reg = 0x00009888, .val = 0x3A1F2000 },
         { .reg = 0x00009888, .val = 0x4E1F2413 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x401F3013 },
         { .reg = 0x00009888, .val = 0x421F3030 },
         { .reg = 0x00009888, .val = 0x441F3030 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x30000036 },
         { .reg = 0x00002774, .val = 0x01FFFE00 },
         { .reg = 0x00002778, .val = 0x31000034 },
         { .reg = 0x0000277C, .val = 0x01FFFE00 },
         { .reg = 0x00002780, .val = 0x00000C9A },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x00000C92 },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x00000CA2 },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x00000E42 },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x00000E6A },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x00000C32 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
         { .reg = 0x0000274C, .val = 0x87643210 },
         { .reg = 0x00002744, .val = 0x00001811 },
         { .reg = 0x00002748, .val = 0x87654310 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_extended__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__typed_atomics00__read;
         counter->name = "Typed Atomics 00";
         counter->desc = "Slice 0 Dualsubslice 0 typed atomics. Unit: messages.";
         counter->symbol_name = "TypedAtomics00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 40;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__typed_reads00__read;
         counter->name = "Typed Reads 00";
         counter->desc = "Slice 0 Dualsubslice 0 typed reads. Unit: messages.";
         counter->symbol_name = "TypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 48;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__typed_writes00__read;
         counter->name = "Typed Writes 00";
         counter->desc = "Slice 0 Dualsubslice 0 typed writes. Unit: messages.";
         counter->symbol_name = "TypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 56;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__untyped_reads00__read;
         counter->name = "Untyped Reads 00";
         counter->desc = "Slice 0 Dualsubslice 0 untyped reads (including SLM reads). Unit: messages.";
         counter->symbol_name = "UntypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 64;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__untyped_writes00__read;
         counter->name = "Untyped Writes 00";
         counter->desc = "Slice 0 Dualsubslice 0 untyped writes (including SLM writes). Unit: messages.";
         counter->symbol_name = "UntypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 72;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__eu_typed_reads00__read;
         counter->name = "Eu Typed Reads 00";
         counter->desc = "Slice0 Dualsubslice 0 Eu Typed Reads Unit: messages.";
         counter->symbol_name = "EuTypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 80;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__eu_typed_writes00__read;
         counter->name = "Eu Typed Writes 00";
         counter->desc = "Slice0 Dualsubslice 0 Eu Typed Writes Unit: messages.";
         counter->symbol_name = "EuTypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 88;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__eu_typed_atomics00__read;
         counter->name = "Eu Typed Atomics 00";
         counter->desc = "Slice0 Dualsubslice 0 Eu Typed Atomics Unit: messages.";
         counter->symbol_name = "EuTypedAtomics00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__eu_a32_untyped_reads00__read;
         counter->name = "Eu A32 Untyped Reads 00";
         counter->desc = "Slice0 Dualsubslice 0 Eu A32 Untyped Reads Unit: messages.";
         counter->symbol_name = "EuA32UntypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__eu_a32_untyped_writes00__read;
         counter->name = "Eu A32 Untyped Writes 00";
         counter->desc = "Slice0 Dualsubslice 0 Eu A32 Untyped Writes Unit: messages.";
         counter->symbol_name = "EuA32UntypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__compute_extended__eu_a64_untyped_reads00__read;
         counter->name = "Eu 64 Untyped Reads 00";
         counter->desc = "Slice0 Dualsubslice 0 Eu 64 Untyped Reads Unit: messages.";
         counter->symbol_name = "EuA64UntypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_extended__eu_a64_untyped_writes00__read;
         counter->name = "Eu A64 Untyped Writes 00";
         counter->desc = "Slice0 Dualsubslice 0 Eu A64 Untyped Writes Unit: messages.";
         counter->symbol_name = "EuA64UntypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__compute_extended__typed_atomics_per_cache_line__read;
         counter->name = "TypedAtomicsPerCacheLine";
         counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
         counter->symbol_name = "TypedAtomicsPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__compute_extended__typed_reads_per_cache_line__read;
         counter->name = "TypedReadsPerCacheLine";
         counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
         counter->symbol_name = "TypedReadsPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__compute_extended__typed_writes_per_cache_line__read;
         counter->name = "TypedWritesPerCacheLine";
         counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
         counter->symbol_name = "TypedWritesPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 144;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 148;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__compute_extended__untyped_writes_per_cache_line__read;
         counter->name = "UntypedWritesPerCacheLine";
         counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
         counter->symbol_name = "UntypedWritesPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 152;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache metrics set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "05d5e01f-0800-4975-b36b-7b169cad3fab";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 55);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14120700 },
         { .reg = 0x00009888, .val = 0x121500E0 },
         { .reg = 0x00009888, .val = 0x10040154 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E040055 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600F0 },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x0C120023 },
         { .reg = 0x00009888, .val = 0x0E12152B },
         { .reg = 0x00009888, .val = 0x00121480 },
         { .reg = 0x00009888, .val = 0x02120028 },
         { .reg = 0x00009888, .val = 0x10120000 },
         { .reg = 0x00009888, .val = 0x18120000 },
         { .reg = 0x00009888, .val = 0x1A120000 },
         { .reg = 0x00009888, .val = 0x04120000 },
         { .reg = 0x00009888, .val = 0x0A13D000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0C144000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x0C150980 },
         { .reg = 0x00009888, .val = 0x02154D80 },
         { .reg = 0x00009888, .val = 0x04154C9A },
         { .reg = 0x00009888, .val = 0x06150018 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x16150800 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x08150000 },
         { .reg = 0x00009888, .val = 0x0A150000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1217241C },
         { .reg = 0x00009888, .val = 0x22000050 },
         { .reg = 0x00009888, .val = 0x18133C00 },
         { .reg = 0x00009888, .val = 0x081710B0 },
         { .reg = 0x00009888, .val = 0x10170000 },
         { .reg = 0x00009888, .val = 0x501F1124 },
         { .reg = 0x00009888, .val = 0x521F4801 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x461F3100 },
         { .reg = 0x00009888, .val = 0x481F0031 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0030 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F0000 },
         { .reg = 0x00009888, .val = 0x421F3030 },
         { .reg = 0x00009888, .val = 0x441F3030 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "Slice0 L3 Bank0 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank0. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 264;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "Slice0 L3 Bank1 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank1. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 272;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "Slice0 L3 Bank2 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank2. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 280;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "Slice0 L3 Bank3 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank3. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 288;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank04_accesses__read;
         counter->name = "Slice0 L3 Bank4 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank4. Unit: messages.";
         counter->symbol_name = "L3Bank04Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 296;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank05_accesses__read;
         counter->name = "Slice0 L3 Bank5 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank5. Unit: messages.";
         counter->symbol_name = "L3Bank05Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank06_accesses__read;
         counter->name = "Slice0 L3 Bank6 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank6. Unit: messages.";
         counter->symbol_name = "L3Bank06Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_bank07_accesses__read;
         counter->name = "Slice0 L3 Bank7 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank7. Unit: messages.";
         counter->symbol_name = "L3Bank07Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 344;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 376;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "03c7a167-2abc-4ba6-878a-f1d80082abca";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x001F001E },
         { .reg = 0x00009888, .val = 0x10160000 },
         { .reg = 0x00009888, .val = 0x1017001F },
         { .reg = 0x00009888, .val = 0x0A1F0005 },
         { .reg = 0x00009888, .val = 0x0E1F0000 },
         { .reg = 0x00009888, .val = 0x0C1F0000 },
         { .reg = 0x00009888, .val = 0x1A200800 },
         { .reg = 0x00009888, .val = 0x10040015 },
         { .reg = 0x00009888, .val = 0x0E054000 },
         { .reg = 0x00009888, .val = 0x14050005 },
         { .reg = 0x00009888, .val = 0x26060038 },
         { .reg = 0x00009888, .val = 0x16157E00 },
         { .reg = 0x00009888, .val = 0x0C160022 },
         { .reg = 0x00009888, .val = 0x08160000 },
         { .reg = 0x00009888, .val = 0x0C170540 },
         { .reg = 0x00009888, .val = 0x04170000 },
         { .reg = 0x00009888, .val = 0x0A188000 },
         { .reg = 0x00009888, .val = 0x0C181000 },
         { .reg = 0x00009888, .val = 0x12195000 },
         { .reg = 0x00009888, .val = 0x14190001 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x12010000 },
         { .reg = 0x00009888, .val = 0x100B7C00 },
         { .reg = 0x00009888, .val = 0x100F0019 },
         { .reg = 0x00009888, .val = 0x10107C00 },
         { .reg = 0x00009888, .val = 0x16100000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x120703C0 },
         { .reg = 0x00009888, .val = 0x10090000 },
         { .reg = 0x00009888, .val = 0x06010080 },
         { .reg = 0x00009888, .val = 0x0E014180 },
         { .reg = 0x00009888, .val = 0x10010000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x060B0015 },
         { .reg = 0x00009888, .val = 0x080B8000 },
         { .reg = 0x00009888, .val = 0x0A0B2000 },
         { .reg = 0x00009888, .val = 0x020BA000 },
         { .reg = 0x00009888, .val = 0x040BA000 },
         { .reg = 0x00009888, .val = 0x0E0E0A02 },
         { .reg = 0x00009888, .val = 0x0C0EA800 },
         { .reg = 0x00009888, .val = 0x020F0002 },
         { .reg = 0x00009888, .val = 0x180F0800 },
         { .reg = 0x00009888, .val = 0x040F0000 },
         { .reg = 0x00009888, .val = 0x060F8000 },
         { .reg = 0x00009888, .val = 0x0A100017 },
         { .reg = 0x00009888, .val = 0x04107113 },
         { .reg = 0x00009888, .val = 0x18100020 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x08100000 },
         { .reg = 0x00009888, .val = 0x08110C40 },
         { .reg = 0x00009888, .val = 0x02110C80 },
         { .reg = 0x00009888, .val = 0x00024000 },
         { .reg = 0x00009888, .val = 0x06028000 },
         { .reg = 0x00009888, .val = 0x0E02C000 },
         { .reg = 0x00009888, .val = 0x00039000 },
         { .reg = 0x00009888, .val = 0x06036000 },
         { .reg = 0x00009888, .val = 0x0803A000 },
         { .reg = 0x00009888, .val = 0x0A032000 },
         { .reg = 0x00009888, .val = 0x0E035000 },
         { .reg = 0x00009888, .val = 0x0203A000 },
         { .reg = 0x00009888, .val = 0x0403A000 },
         { .reg = 0x00009888, .val = 0x00044000 },
         { .reg = 0x00009888, .val = 0x0E044000 },
         { .reg = 0x00009888, .val = 0x10068000 },
         { .reg = 0x00009888, .val = 0x12068000 },
         { .reg = 0x00009888, .val = 0x14068000 },
         { .reg = 0x00009888, .val = 0x02068000 },
         { .reg = 0x00009888, .val = 0x04068000 },
         { .reg = 0x00009888, .val = 0x06068000 },
         { .reg = 0x00009888, .val = 0x08068000 },
         { .reg = 0x00009888, .val = 0x0A068000 },
         { .reg = 0x00009888, .val = 0x0C068000 },
         { .reg = 0x00009888, .val = 0x00070032 },
         { .reg = 0x00009888, .val = 0x0E070033 },
         { .reg = 0x00009888, .val = 0x04070000 },
         { .reg = 0x00009888, .val = 0x08098011 },
         { .reg = 0x00009888, .val = 0x00090980 },
         { .reg = 0x00009888, .val = 0x18090A00 },
         { .reg = 0x00009888, .val = 0x02090000 },
         { .reg = 0x00009888, .val = 0x04098000 },
         { .reg = 0x00009888, .val = 0x06098000 },
         { .reg = 0x00009888, .val = 0x0A098000 },
         { .reg = 0x00009888, .val = 0x0C098000 },
         { .reg = 0x00009888, .val = 0x301F6000 },
         { .reg = 0x00009888, .val = 0x501F36DB },
         { .reg = 0x00009888, .val = 0x521F491B },
         { .reg = 0x00009888, .val = 0x541F001B },
         { .reg = 0x00009888, .val = 0x3A1F6000 },
         { .reg = 0x00009888, .val = 0x4E1F1B41 },
         { .reg = 0x00009888, .val = 0x3E1F2100 },
         { .reg = 0x00009888, .val = 0x461F4141 },
         { .reg = 0x00009888, .val = 0x481F1160 },
         { .reg = 0x00009888, .val = 0x4A1F1000 },
         { .reg = 0x00009888, .val = 0x4C1F2120 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x401F0141 },
         { .reg = 0x00009888, .val = 0x421F1160 },
         { .reg = 0x00009888, .val = 0x441F3111 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "c5cbc488-6569-41dd-9128-42bf6f0d317c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1223000A },
         { .reg = 0x00009888, .val = 0x10230009 },
         { .reg = 0x00009888, .val = 0x1263000A },
         { .reg = 0x00009888, .val = 0x10630009 },
         { .reg = 0x00009888, .val = 0x12A3000A },
         { .reg = 0x00009888, .val = 0x10A30009 },
         { .reg = 0x00009888, .val = 0x12E3000A },
         { .reg = 0x00009888, .val = 0x10E30009 },
         { .reg = 0x00009888, .val = 0x10150019 },
         { .reg = 0x00009888, .val = 0x1C20000A },
         { .reg = 0x00009888, .val = 0x0C231900 },
         { .reg = 0x00009888, .val = 0x0E23003C },
         { .reg = 0x00009888, .val = 0x00230000 },
         { .reg = 0x00009888, .val = 0x1C600020 },
         { .reg = 0x00009888, .val = 0x02608000 },
         { .reg = 0x00009888, .val = 0x0E631900 },
         { .reg = 0x00009888, .val = 0x00631E00 },
         { .reg = 0x00009888, .val = 0x04A08000 },
         { .reg = 0x00009888, .val = 0x06A08000 },
         { .reg = 0x00009888, .val = 0x02A31E32 },
         { .reg = 0x00009888, .val = 0x00A30000 },
         { .reg = 0x00009888, .val = 0x08E08000 },
         { .reg = 0x00009888, .val = 0x0AE08000 },
         { .reg = 0x00009888, .val = 0x04E31E32 },
         { .reg = 0x00009888, .val = 0x00E30000 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x10040150 },
         { .reg = 0x00009888, .val = 0x0E040015 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x14050054 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x24063F00 },
         { .reg = 0x00009888, .val = 0x260600E0 },
         { .reg = 0x00009888, .val = 0x0E088000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x14090040 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x080E5000 },
         { .reg = 0x00009888, .val = 0x1A120020 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x0412C000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0A138000 },
         { .reg = 0x00009888, .val = 0x0413E000 },
         { .reg = 0x00009888, .val = 0x0E148000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x0214C000 },
         { .reg = 0x00009888, .val = 0x00150047 },
         { .reg = 0x00009888, .val = 0x16156000 },
         { .reg = 0x00009888, .val = 0x18150007 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C188000 },
         { .reg = 0x00009888, .val = 0x0E182000 },
         { .reg = 0x00009888, .val = 0x1419000A },
         { .reg = 0x00009888, .val = 0x021CA000 },
         { .reg = 0x00009888, .val = 0x0A1DA000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3061 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x521F4000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x501F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0100 },
         { .reg = 0x00009888, .val = 0x4C1F3030 },
         { .reg = 0x00009888, .val = 0x401F0000 },
         { .reg = 0x00009888, .val = 0x421F3000 },
         { .reg = 0x00009888, .val = 0x441F3330 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FFFE },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
      counter->name = "Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3";
      counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0) Unit: percent.";
      counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
      counter->category = "GPU/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
      counter->name = "Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3";
      counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1) Unit: percent.";
      counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
      counter->category = "GPU/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
      counter->name = "Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3";
      counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2) Unit: percent.";
      counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
      counter->category = "GPU/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read;
      counter->name = "Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3";
      counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3) Unit: percent.";
      counter->symbol_name = "NonSamplerShader03AccessStalledOnL3";
      counter->category = "GPU/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__hdc_and_sf__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "0316ce4f-e03f-4738-8262-13528fce8eea";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x140A001F },
         { .reg = 0x00009888, .val = 0x040C5000 },
         { .reg = 0x00009888, .val = 0x140F001F },
         { .reg = 0x00009888, .val = 0x04115017 },
         { .reg = 0x00009888, .val = 0x10136000 },
         { .reg = 0x00009888, .val = 0x1213001C },
         { .reg = 0x00009888, .val = 0x16140000 },
         { .reg = 0x00009888, .val = 0x10160018 },
         { .reg = 0x00009888, .val = 0x101A0018 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045545 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E051000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08051000 },
         { .reg = 0x00009888, .val = 0x2406DF00 },
         { .reg = 0x00009888, .val = 0x26060007 },
         { .reg = 0x00009888, .val = 0x00084000 },
         { .reg = 0x00009888, .val = 0x02084000 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x06094000 },
         { .reg = 0x00009888, .val = 0x000A3080 },
         { .reg = 0x00009888, .val = 0x160A0000 },
         { .reg = 0x00009888, .val = 0x080A0000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x020C05C0 },
         { .reg = 0x00009888, .val = 0x080C0000 },
         { .reg = 0x00009888, .val = 0x020D1000 },
         { .reg = 0x00009888, .val = 0x040D1000 },
         { .reg = 0x00009888, .val = 0x060E1000 },
         { .reg = 0x00009888, .val = 0x080E1000 },
         { .reg = 0x00009888, .val = 0x020F0061 },
         { .reg = 0x00009888, .val = 0x160F0000 },
         { .reg = 0x00009888, .val = 0x0A0F0000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x08110000 },
         { .reg = 0x00009888, .val = 0x18120C00 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x06128000 },
         { .reg = 0x00009888, .val = 0x0613092F },
         { .reg = 0x00009888, .val = 0x08134025 },
         { .reg = 0x00009888, .val = 0x0C130000 },
         { .reg = 0x00009888, .val = 0x0413A000 },
         { .reg = 0x00009888, .val = 0x00148066 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x0614C000 },
         { .reg = 0x00009888, .val = 0x08144000 },
         { .reg = 0x00009888, .val = 0x0A144000 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x00154000 },
         { .reg = 0x00009888, .val = 0x0E154000 },
         { .reg = 0x00009888, .val = 0x161500E8 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04158000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0C154000 },
         { .reg = 0x00009888, .val = 0x08161000 },
         { .reg = 0x00009888, .val = 0x08184000 },
         { .reg = 0x00009888, .val = 0x12190100 },
         { .reg = 0x00009888, .val = 0x0A1A0020 },
         { .reg = 0x00009888, .val = 0x081A0000 },
         { .reg = 0x00009888, .val = 0x0A1C1000 },
         { .reg = 0x00009888, .val = 0x121D0400 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3000 },
         { .reg = 0x00009888, .val = 0x1A1F0061 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4904 },
         { .reg = 0x00009888, .val = 0x521F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x3E1F6000 },
         { .reg = 0x00009888, .val = 0x461F5050 },
         { .reg = 0x00009888, .val = 0x481F1040 },
         { .reg = 0x00009888, .val = 0x401F4010 },
         { .reg = 0x00009888, .val = 0x421F3000 },
         { .reg = 0x00009888, .val = 0x441F5033 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000038 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFF7 },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFEF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__pixel_data00_ready__read;
      counter->name = "Slice0 Pipe0 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData00Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__pixel_data01_ready__read;
      counter->name = "Slice0 Pipe1 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe1 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData01Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__ps_output00_available__read;
      counter->name = "Slice0 Pipe0 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe0 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput00Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__ps_output01_available__read;
      counter->name = "Slice0 Pipe1 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe1 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput01Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__pixel_values00_ready__read;
      counter->name = "Slice0 Pipe0 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe0 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues00Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__pixel_values01_ready__read;
      counter->name = "Slice0 Pipe1 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe1 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues01Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__rasterizer_and_pixel_backend__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "21d69ec3-91e1-48a8-acd6-c0c4ec6e819a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x17340000 },
         { .reg = 0x00009888, .val = 0x17740000 },
         { .reg = 0x00009888, .val = 0x17B40000 },
         { .reg = 0x00009888, .val = 0x17F40000 },
         { .reg = 0x00009888, .val = 0x16340000 },
         { .reg = 0x00009888, .val = 0x16740000 },
         { .reg = 0x00009888, .val = 0x16B40000 },
         { .reg = 0x00009888, .val = 0x16F40000 },
         { .reg = 0x00009888, .val = 0x07340037 },
         { .reg = 0x00009888, .val = 0x21340000 },
         { .reg = 0x00009888, .val = 0x09740037 },
         { .reg = 0x00009888, .val = 0x21740000 },
         { .reg = 0x00009888, .val = 0x0BB40037 },
         { .reg = 0x00009888, .val = 0x21B40000 },
         { .reg = 0x00009888, .val = 0x0DF40037 },
         { .reg = 0x00009888, .val = 0x21F40000 },
         { .reg = 0x00009888, .val = 0x0E0000A7 },
         { .reg = 0x00009888, .val = 0x08012000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x0C038000 },
         { .reg = 0x00009888, .val = 0x1C340037 },
         { .reg = 0x00009888, .val = 0x20340000 },
         { .reg = 0x00009888, .val = 0x1E740037 },
         { .reg = 0x00009888, .val = 0x20740000 },
         { .reg = 0x00009888, .val = 0x02B40037 },
         { .reg = 0x00009888, .val = 0x20B40000 },
         { .reg = 0x00009888, .val = 0x04F40037 },
         { .reg = 0x00009888, .val = 0x20F40000 },
         { .reg = 0x00009888, .val = 0x10040200 },
         { .reg = 0x00009888, .val = 0x0E040055 },
         { .reg = 0x00009888, .val = 0x14050070 },
         { .reg = 0x00009888, .val = 0x04058000 },
         { .reg = 0x00009888, .val = 0x06056000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600C0 },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x02074000 },
         { .reg = 0x00009888, .val = 0x04078000 },
         { .reg = 0x00009888, .val = 0x06124000 },
         { .reg = 0x00009888, .val = 0x08124000 },
         { .reg = 0x00009888, .val = 0x0A124000 },
         { .reg = 0x00009888, .val = 0x0C124000 },
         { .reg = 0x00009888, .val = 0x04138000 },
         { .reg = 0x00009888, .val = 0x06137000 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x0414C000 },
         { .reg = 0x00009888, .val = 0x06144000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08154000 },
         { .reg = 0x00009888, .val = 0x0A154000 },
         { .reg = 0x00009888, .val = 0x0C154000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3000 },
         { .reg = 0x00009888, .val = 0x2A1F0061 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x521F0000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2460 },
         { .reg = 0x00009888, .val = 0x501F0124 },
         { .reg = 0x00009888, .val = 0x4C1F0133 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F6060 },
         { .reg = 0x00009888, .val = 0x421F0010 },
         { .reg = 0x00009888, .val = 0x441F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank4_active__read;
         counter->name = "Slice0 L3 Bank4 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank4 is active Unit: percent.";
         counter->symbol_name = "L30Bank4Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank5_active__read;
         counter->name = "Slice0 L3 Bank5 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank5 is active Unit: percent.";
         counter->symbol_name = "L30Bank5Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank6_active__read;
         counter->name = "Slice0 L3 Bank6 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank6 is active Unit: percent.";
         counter->symbol_name = "L30Bank6Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_1__l30_bank7_active__read;
         counter->name = "Slice0 L3 Bank7 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank7 is active Unit: percent.";
         counter->symbol_name = "L30Bank7Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_1__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank0 stalled metric set";
   query->symbol_name = "L3_2";
   query->guid = "e60e9155-6830-4aec-baf2-1c3c15a73869";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10050C00 },
         { .reg = 0x00009888, .val = 0x12050002 },
         { .reg = 0x00009888, .val = 0x00050025 },
         { .reg = 0x00009888, .val = 0x06050900 },
         { .reg = 0x00009888, .val = 0x080508EA },
         { .reg = 0x00009888, .val = 0x0A0508AB },
         { .reg = 0x00009888, .val = 0x0C050A21 },
         { .reg = 0x00009888, .val = 0x0E050A60 },
         { .reg = 0x00009888, .val = 0x04050000 },
         { .reg = 0x00009888, .val = 0x14050000 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_2__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank1 stalled metric set";
   query->symbol_name = "L3_3";
   query->guid = "47c364d5-1799-4d17-9447-add9358c6451";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10050400 },
         { .reg = 0x00009888, .val = 0x12050000 },
         { .reg = 0x00009888, .val = 0x00050025 },
         { .reg = 0x00009888, .val = 0x06050900 },
         { .reg = 0x00009888, .val = 0x080508EA },
         { .reg = 0x00009888, .val = 0x0A0508AB },
         { .reg = 0x00009888, .val = 0x0C050A21 },
         { .reg = 0x00009888, .val = 0x0E050A60 },
         { .reg = 0x00009888, .val = 0x04050000 },
         { .reg = 0x00009888, .val = 0x14050000 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_3__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank4 stalled metric set";
   query->symbol_name = "L3_4";
   query->guid = "e5ab5c08-3130-4469-8eaf-b23d3dc817d4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10010C00 },
         { .reg = 0x00009888, .val = 0x12010002 },
         { .reg = 0x00009888, .val = 0x0C000400 },
         { .reg = 0x00009888, .val = 0x0E005500 },
         { .reg = 0x00009888, .val = 0x10000155 },
         { .reg = 0x00009888, .val = 0x00010025 },
         { .reg = 0x00009888, .val = 0x06010900 },
         { .reg = 0x00009888, .val = 0x080108EA },
         { .reg = 0x00009888, .val = 0x0A0108AB },
         { .reg = 0x00009888, .val = 0x0C010A21 },
         { .reg = 0x00009888, .val = 0x0E010A60 },
         { .reg = 0x00009888, .val = 0x04010000 },
         { .reg = 0x00009888, .val = 0x14010000 },
         { .reg = 0x00009888, .val = 0x0C040400 },
         { .reg = 0x00009888, .val = 0x0E045500 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04051000 },
         { .reg = 0x00009888, .val = 0x0A054000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x18125540 },
         { .reg = 0x00009888, .val = 0x1A120015 },
         { .reg = 0x00009888, .val = 0x04131000 },
         { .reg = 0x00009888, .val = 0x06138000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x00144000 },
         { .reg = 0x00009888, .val = 0x06148000 },
         { .reg = 0x00009888, .val = 0x0814C000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00154000 },
         { .reg = 0x00009888, .val = 0x0E154000 },
         { .reg = 0x00009888, .val = 0x16152AA8 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_4__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_4__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_4__l30_bank4_stalled__read;
         counter->name = "Slice0 L3 Bank4 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank4 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank4Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_l3_5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank5 stalled metric set";
   query->symbol_name = "L3_5";
   query->guid = "6cdf23c1-f725-414c-959a-c90fa5571b1f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10010400 },
         { .reg = 0x00009888, .val = 0x12010000 },
         { .reg = 0x00009888, .val = 0x0C000400 },
         { .reg = 0x00009888, .val = 0x0E005500 },
         { .reg = 0x00009888, .val = 0x10000155 },
         { .reg = 0x00009888, .val = 0x00010022 },
         { .reg = 0x00009888, .val = 0x06010840 },
         { .reg = 0x00009888, .val = 0x08010828 },
         { .reg = 0x00009888, .val = 0x0A010969 },
         { .reg = 0x00009888, .val = 0x0C010AA4 },
         { .reg = 0x00009888, .val = 0x0E010AE3 },
         { .reg = 0x00009888, .val = 0x04010000 },
         { .reg = 0x00009888, .val = 0x14010000 },
         { .reg = 0x00009888, .val = 0x0C040400 },
         { .reg = 0x00009888, .val = 0x0E045500 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04051000 },
         { .reg = 0x00009888, .val = 0x0A054000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x18125540 },
         { .reg = 0x00009888, .val = 0x1A120015 },
         { .reg = 0x00009888, .val = 0x04131000 },
         { .reg = 0x00009888, .val = 0x06138000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x00144000 },
         { .reg = 0x00009888, .val = 0x06148000 },
         { .reg = 0x00009888, .val = 0x0814C000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00154000 },
         { .reg = 0x00009888, .val = 0x0E154000 },
         { .reg = 0x00009888, .val = 0x16152AA8 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__l3_5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__l3_5__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__l3_5__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__l3_5__l30_bank5_stalled__read;
         counter->name = "Slice0 L3 Bank5 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank5 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank5Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler 1";
   query->symbol_name = "Sampler_1";
   query->guid = "51a2eb6d-9fad-4489-8f22-ab845fe7882a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x142A0165 },
         { .reg = 0x00009888, .val = 0x142F0165 },
         { .reg = 0x00009888, .val = 0x146A0165 },
         { .reg = 0x00009888, .val = 0x146F0165 },
         { .reg = 0x00009888, .val = 0x14AA0165 },
         { .reg = 0x00009888, .val = 0x14AF0165 },
         { .reg = 0x00009888, .val = 0x14EA0165 },
         { .reg = 0x00009888, .val = 0x14EF0165 },
         { .reg = 0x00009888, .val = 0x161E8000 },
         { .reg = 0x00009888, .val = 0x181EC000 },
         { .reg = 0x00009888, .val = 0x1A1E0400 },
         { .reg = 0x00009888, .val = 0x061F8000 },
         { .reg = 0x00009888, .val = 0x081F8000 },
         { .reg = 0x00009888, .val = 0x0A1F8000 },
         { .reg = 0x00009888, .val = 0x0C1F0400 },
         { .reg = 0x00009888, .val = 0x06204000 },
         { .reg = 0x00009888, .val = 0x08204000 },
         { .reg = 0x00009888, .val = 0x0A204000 },
         { .reg = 0x00009888, .val = 0x0C204000 },
         { .reg = 0x00009888, .val = 0x06218000 },
         { .reg = 0x00009888, .val = 0x08218000 },
         { .reg = 0x00009888, .val = 0x0A218000 },
         { .reg = 0x00009888, .val = 0x0C218000 },
         { .reg = 0x00009888, .val = 0x042A1800 },
         { .reg = 0x00009888, .val = 0x062AC038 },
         { .reg = 0x00009888, .val = 0x102A0000 },
         { .reg = 0x00009888, .val = 0x082AC000 },
         { .reg = 0x00009888, .val = 0x0A2A0000 },
         { .reg = 0x00009888, .val = 0x0C2A0000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x182C0002 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x042F0048 },
         { .reg = 0x00009888, .val = 0x102F0000 },
         { .reg = 0x00009888, .val = 0x1C5E000C },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x145E8000 },
         { .reg = 0x00009888, .val = 0x0E5F000C },
         { .reg = 0x00009888, .val = 0x025F8000 },
         { .reg = 0x00009888, .val = 0x045F8000 },
         { .reg = 0x00009888, .val = 0x1C600014 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x04604000 },
         { .reg = 0x00009888, .val = 0x16612800 },
         { .reg = 0x00009888, .val = 0x02618000 },
         { .reg = 0x00009888, .val = 0x04618000 },
         { .reg = 0x00009888, .val = 0x006A1800 },
         { .reg = 0x00009888, .val = 0x026A0038 },
         { .reg = 0x00009888, .val = 0x106A0000 },
         { .reg = 0x00009888, .val = 0x1A6A00F0 },
         { .reg = 0x00009888, .val = 0x046A0000 },
         { .reg = 0x00009888, .val = 0x1A6C000A },
         { .reg = 0x00009888, .val = 0x066C8000 },
         { .reg = 0x00009888, .val = 0x086C8000 },
         { .reg = 0x00009888, .val = 0x0E6F2440 },
         { .reg = 0x00009888, .val = 0x106F0000 },
         { .reg = 0x00009888, .val = 0x1A9EC000 },
         { .reg = 0x00009888, .val = 0x1C9E0003 },
         { .reg = 0x00009888, .val = 0x0C9FC000 },
         { .reg = 0x00009888, .val = 0x0E9F0003 },
         { .reg = 0x00009888, .val = 0x1AA02A00 },
         { .reg = 0x00009888, .val = 0x1CA00001 },
         { .reg = 0x00009888, .val = 0x16A102A8 },
         { .reg = 0x00009888, .val = 0x0CAA1C30 },
         { .reg = 0x00009888, .val = 0x10AA0000 },
         { .reg = 0x00009888, .val = 0x18AAF000 },
         { .reg = 0x00009888, .val = 0x1AAA0000 },
         { .reg = 0x00009888, .val = 0x18ACAA00 },
         { .reg = 0x00009888, .val = 0x0AAF2440 },
         { .reg = 0x00009888, .val = 0x10AF0000 },
         { .reg = 0x00009888, .val = 0x10DE8000 },
         { .reg = 0x00009888, .val = 0x1ADE3800 },
         { .reg = 0x00009888, .val = 0x00DF8000 },
         { .reg = 0x00009888, .val = 0x0CDF3800 },
         { .reg = 0x00009888, .val = 0x00E04000 },
         { .reg = 0x00009888, .val = 0x0EE04000 },
         { .reg = 0x00009888, .val = 0x1AE000A0 },
         { .reg = 0x00009888, .val = 0x00E18000 },
         { .reg = 0x00009888, .val = 0x0EE18000 },
         { .reg = 0x00009888, .val = 0x14E18000 },
         { .reg = 0x00009888, .val = 0x16E10002 },
         { .reg = 0x00009888, .val = 0x08EA1C30 },
         { .reg = 0x00009888, .val = 0x10EA0000 },
         { .reg = 0x00009888, .val = 0x00EAC000 },
         { .reg = 0x00009888, .val = 0x0EEAC000 },
         { .reg = 0x00009888, .val = 0x18EA0000 },
         { .reg = 0x00009888, .val = 0x04EC8000 },
         { .reg = 0x00009888, .val = 0x18EC00A8 },
         { .reg = 0x00009888, .val = 0x00EF0040 },
         { .reg = 0x00009888, .val = 0x06EF2400 },
         { .reg = 0x00009888, .val = 0x10EF0000 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FF00 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x0E08A000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x02082000 },
         { .reg = 0x00009888, .val = 0x14090050 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x06091000 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x040E1000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x0C0E5000 },
         { .reg = 0x00009888, .val = 0x1812FC00 },
         { .reg = 0x00009888, .val = 0x1A12002B },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x0813C000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x00158000 },
         { .reg = 0x00009888, .val = 0x0E158000 },
         { .reg = 0x00009888, .val = 0x16152AD0 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x0615C000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A15C000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x02188000 },
         { .reg = 0x00009888, .val = 0x0418A000 },
         { .reg = 0x00009888, .val = 0x06182000 },
         { .reg = 0x00009888, .val = 0x0A198000 },
         { .reg = 0x00009888, .val = 0x0C19A000 },
         { .reg = 0x00009888, .val = 0x0E192000 },
         { .reg = 0x00009888, .val = 0x0A1CA000 },
         { .reg = 0x00009888, .val = 0x0C1CA000 },
         { .reg = 0x00009888, .val = 0x121D5400 },
         { .reg = 0x00009888, .val = 0x141D0002 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x3E1F3000 },
         { .reg = 0x00009888, .val = 0x461F4030 },
         { .reg = 0x00009888, .val = 0x481F0040 },
         { .reg = 0x00009888, .val = 0x4A1F1000 },
         { .reg = 0x00009888, .val = 0x4C1F0010 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x401F1010 },
         { .reg = 0x00009888, .val = 0x421F3030 },
         { .reg = 0x00009888, .val = 0x441F4040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00000018 },
         { .reg = 0x00002774, .val = 0x0000FFFC },
         { .reg = 0x00002778, .val = 0x00000060 },
         { .reg = 0x0000277C, .val = 0x0000FFF3 },
         { .reg = 0x00002780, .val = 0x00000180 },
         { .reg = 0x00002784, .val = 0x0000FFCF },
         { .reg = 0x00002788, .val = 0x00000600 },
         { .reg = 0x0000278C, .val = 0x0000FF3F },
         { .reg = 0x00002790, .val = 0x00001800 },
         { .reg = 0x00002794, .val = 0x0000FCFF },
         { .reg = 0x00002798, .val = 0x00006000 },
         { .reg = 0x0000279C, .val = 0x0000F3FF },
         { .reg = 0x000027A0, .val = 0x00018000 },
         { .reg = 0x000027A4, .val = 0x0000CFFF },
         { .reg = 0x000027A8, .val = 0x00060000 },
         { .reg = 0x000027AC, .val = 0x00003FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler03_input_available__read;
         counter->name = "Slice0 Subslice3 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice3 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler03InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler04_input_available__read;
         counter->name = "Slice0 Subslice4 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice4 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler04InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler05_input_available__read;
         counter->name = "Slice0 Subslice5 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice5 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler05InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler06_input_available__read;
         counter->name = "Slice0 Subslice6 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice6 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler06InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_1__sampler07_input_available__read;
         counter->name = "Slice0 Subslice7 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice7 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler07InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_sampler_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler 2";
   query->symbol_name = "Sampler_2";
   query->guid = "afc0f021-8c33-4d60-803d-93487f96c7c1";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x142D0005 },
         { .reg = 0x00009888, .val = 0x14320005 },
         { .reg = 0x00009888, .val = 0x146D0005 },
         { .reg = 0x00009888, .val = 0x14720005 },
         { .reg = 0x00009888, .val = 0x14AD0005 },
         { .reg = 0x00009888, .val = 0x14B20005 },
         { .reg = 0x00009888, .val = 0x14ED0005 },
         { .reg = 0x00009888, .val = 0x14F20005 },
         { .reg = 0x00009888, .val = 0x1C1E000C },
         { .reg = 0x00009888, .val = 0x0E1F000C },
         { .reg = 0x00009888, .val = 0x1C200014 },
         { .reg = 0x00009888, .val = 0x16212800 },
         { .reg = 0x00009888, .val = 0x22290010 },
         { .reg = 0x00009888, .val = 0x1A2A00D0 },
         { .reg = 0x00009888, .val = 0x1A2C000A },
         { .reg = 0x00009888, .val = 0x0E2D0033 },
         { .reg = 0x00009888, .val = 0x022D0000 },
         { .reg = 0x00009888, .val = 0x0E2F8000 },
         { .reg = 0x00009888, .val = 0x1E300080 },
         { .reg = 0x00009888, .val = 0x1A310008 },
         { .reg = 0x00009888, .val = 0x0E321980 },
         { .reg = 0x00009888, .val = 0x02320000 },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x145E8000 },
         { .reg = 0x00009888, .val = 0x025F8000 },
         { .reg = 0x00009888, .val = 0x045F8000 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x04604000 },
         { .reg = 0x00009888, .val = 0x02618000 },
         { .reg = 0x00009888, .val = 0x04618000 },
         { .reg = 0x00009888, .val = 0x12698000 },
         { .reg = 0x00009888, .val = 0x026A4000 },
         { .reg = 0x00009888, .val = 0x046AC000 },
         { .reg = 0x00009888, .val = 0x066C8000 },
         { .reg = 0x00009888, .val = 0x086C8000 },
         { .reg = 0x00009888, .val = 0x006D1980 },
         { .reg = 0x00009888, .val = 0x026D0000 },
         { .reg = 0x00009888, .val = 0x026F4000 },
         { .reg = 0x00009888, .val = 0x1A702000 },
         { .reg = 0x00009888, .val = 0x08718000 },
         { .reg = 0x00009888, .val = 0x02720033 },
         { .reg = 0x00009888, .val = 0x169E8000 },
         { .reg = 0x00009888, .val = 0x189E4000 },
         { .reg = 0x00009888, .val = 0x069F8000 },
         { .reg = 0x00009888, .val = 0x089F8000 },
         { .reg = 0x00009888, .val = 0x06A04000 },
         { .reg = 0x00009888, .val = 0x08A04000 },
         { .reg = 0x00009888, .val = 0x06A18000 },
         { .reg = 0x00009888, .val = 0x08A18000 },
         { .reg = 0x00009888, .val = 0x16A98000 },
         { .reg = 0x00009888, .val = 0x06AA4000 },
         { .reg = 0x00009888, .val = 0x08AAC000 },
         { .reg = 0x00009888, .val = 0x0AAC8000 },
         { .reg = 0x00009888, .val = 0x0CAC8000 },
         { .reg = 0x00009888, .val = 0x02AD1980 },
         { .reg = 0x00009888, .val = 0x04AF4000 },
         { .reg = 0x00009888, .val = 0x1CB00002 },
         { .reg = 0x00009888, .val = 0x0CB18000 },
         { .reg = 0x00009888, .val = 0x04B20033 },
         { .reg = 0x00009888, .val = 0x02B20000 },
         { .reg = 0x00009888, .val = 0x18DE8000 },
         { .reg = 0x00009888, .val = 0x1ADE0400 },
         { .reg = 0x00009888, .val = 0x0ADF8000 },
         { .reg = 0x00009888, .val = 0x0CDF0400 },
         { .reg = 0x00009888, .val = 0x0AE04000 },
         { .reg = 0x00009888, .val = 0x0CE04000 },
         { .reg = 0x00009888, .val = 0x0AE18000 },
         { .reg = 0x00009888, .val = 0x0CE18000 },
         { .reg = 0x00009888, .val = 0x1EE98000 },
         { .reg = 0x00009888, .val = 0x0AEA4000 },
         { .reg = 0x00009888, .val = 0x0CEAC000 },
         { .reg = 0x00009888, .val = 0x0EEC8000 },
         { .reg = 0x00009888, .val = 0x18EC0002 },
         { .reg = 0x00009888, .val = 0x04ED1980 },
         { .reg = 0x00009888, .val = 0x02ED0000 },
         { .reg = 0x00009888, .val = 0x06EF4000 },
         { .reg = 0x00009888, .val = 0x1CF00020 },
         { .reg = 0x00009888, .val = 0x18F10002 },
         { .reg = 0x00009888, .val = 0x06F20033 },
         { .reg = 0x00009888, .val = 0x02F20000 },
         { .reg = 0x00009888, .val = 0x10040140 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E040055 },
         { .reg = 0x00009888, .val = 0x14050050 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600C0 },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x02082000 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x06091000 },
         { .reg = 0x00009888, .val = 0x040D8000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x080E4000 },
         { .reg = 0x00009888, .val = 0x0A0E1000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0812C000 },
         { .reg = 0x00009888, .val = 0x0413E000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x0214C000 },
         { .reg = 0x00009888, .val = 0x04144000 },
         { .reg = 0x00009888, .val = 0x1815000F },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08154000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C158000 },
         { .reg = 0x00009888, .val = 0x0E18A000 },
         { .reg = 0x00009888, .val = 0x14190028 },
         { .reg = 0x00009888, .val = 0x021C8000 },
         { .reg = 0x00009888, .val = 0x041C2000 },
         { .reg = 0x00009888, .val = 0x0A1D8000 },
         { .reg = 0x00009888, .val = 0x0C1D2000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3000 },
         { .reg = 0x00009888, .val = 0x2A1F0061 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x521F0000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2430 },
         { .reg = 0x00009888, .val = 0x501F0124 },
         { .reg = 0x00009888, .val = 0x4C1F5047 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F0020 },
         { .reg = 0x00009888, .val = 0x421F0020 },
         { .reg = 0x00009888, .val = 0x441F3050 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__sampler_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__sampler_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler03_output_ready__read;
         counter->name = "Slice0 Subslice3 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice3 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler03OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler04_output_ready__read;
         counter->name = "Slice0 Subslice4 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice4 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler04OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler05_output_ready__read;
         counter->name = "Slice0 Subslice5 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice5 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler05OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler06_output_ready__read;
         counter->name = "Slice0 Subslice6 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice6 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler06OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__sampler_2__sampler07_output_ready__read;
         counter->name = "Slice0 Subslice7 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice7 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler07OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__sampler_2__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "eddc2f32-b196-4a72-9bf8-21770e35f8bd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 51);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14290000 },
         { .reg = 0x00009888, .val = 0x142E0000 },
         { .reg = 0x00009888, .val = 0x14690000 },
         { .reg = 0x00009888, .val = 0x146E0000 },
         { .reg = 0x00009888, .val = 0x14A90000 },
         { .reg = 0x00009888, .val = 0x14AE0000 },
         { .reg = 0x00009888, .val = 0x14E90000 },
         { .reg = 0x00009888, .val = 0x14EE0000 },
         { .reg = 0x00009888, .val = 0x161E8000 },
         { .reg = 0x00009888, .val = 0x181EC000 },
         { .reg = 0x00009888, .val = 0x1A1E0400 },
         { .reg = 0x00009888, .val = 0x061F8000 },
         { .reg = 0x00009888, .val = 0x081F8000 },
         { .reg = 0x00009888, .val = 0x0A1F8000 },
         { .reg = 0x00009888, .val = 0x0C1F0400 },
         { .reg = 0x00009888, .val = 0x06204000 },
         { .reg = 0x00009888, .val = 0x08204000 },
         { .reg = 0x00009888, .val = 0x0A204000 },
         { .reg = 0x00009888, .val = 0x0C204000 },
         { .reg = 0x00009888, .val = 0x06218000 },
         { .reg = 0x00009888, .val = 0x08218000 },
         { .reg = 0x00009888, .val = 0x0A218000 },
         { .reg = 0x00009888, .val = 0x0C218000 },
         { .reg = 0x00009888, .val = 0x0429C300 },
         { .reg = 0x00009888, .val = 0x062900C5 },
         { .reg = 0x00009888, .val = 0x22290000 },
         { .reg = 0x00009888, .val = 0x1E290000 },
         { .reg = 0x00009888, .val = 0x20290000 },
         { .reg = 0x00009888, .val = 0x062AC000 },
         { .reg = 0x00009888, .val = 0x082AC000 },
         { .reg = 0x00009888, .val = 0x0A2A4000 },
         { .reg = 0x00009888, .val = 0x0C2A4000 },
         { .reg = 0x00009888, .val = 0x0A2C8000 },
         { .reg = 0x00009888, .val = 0x0C2C8000 },
         { .reg = 0x00009888, .val = 0x0E2C8000 },
         { .reg = 0x00009888, .val = 0x182C0002 },
         { .reg = 0x00009888, .val = 0x022EC300 },
         { .reg = 0x00009888, .val = 0x042E00C5 },
         { .reg = 0x00009888, .val = 0x102E0000 },
         { .reg = 0x00009888, .val = 0x022F8000 },
         { .reg = 0x00009888, .val = 0x042F4000 },
         { .reg = 0x00009888, .val = 0x1A304000 },
         { .reg = 0x00009888, .val = 0x1C300001 },
         { .reg = 0x00009888, .val = 0x1C5E000C },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x145E8000 },
         { .reg = 0x00009888, .val = 0x0E5F000C },
         { .reg = 0x00009888, .val = 0x025F8000 },
         { .reg = 0x00009888, .val = 0x045F8000 },
         { .reg = 0x00009888, .val = 0x1C600014 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x04604000 },
         { .reg = 0x00009888, .val = 0x16612800 },
         { .reg = 0x00009888, .val = 0x02618000 },
         { .reg = 0x00009888, .val = 0x04618000 },
         { .reg = 0x00009888, .val = 0x0069C300 },
         { .reg = 0x00009888, .val = 0x026900C5 },
         { .reg = 0x00009888, .val = 0x22690000 },
         { .reg = 0x00009888, .val = 0x12690000 },
         { .reg = 0x00009888, .val = 0x1A6A00F0 },
         { .reg = 0x00009888, .val = 0x026A4000 },
         { .reg = 0x00009888, .val = 0x046A4000 },
         { .reg = 0x00009888, .val = 0x1A6C000A },
         { .reg = 0x00009888, .val = 0x066C8000 },
         { .reg = 0x00009888, .val = 0x086C8000 },
         { .reg = 0x00009888, .val = 0x0E6EC5C3 },
         { .reg = 0x00009888, .val = 0x106E0000 },
         { .reg = 0x00009888, .val = 0x0E6FC000 },
         { .reg = 0x00009888, .val = 0x1E700050 },
         { .reg = 0x00009888, .val = 0x1A9EC000 },
         { .reg = 0x00009888, .val = 0x1C9E0003 },
         { .reg = 0x00009888, .val = 0x0C9FC000 },
         { .reg = 0x00009888, .val = 0x0E9F0003 },
         { .reg = 0x00009888, .val = 0x1AA02A00 },
         { .reg = 0x00009888, .val = 0x1CA00001 },
         { .reg = 0x00009888, .val = 0x16A102A8 },
         { .reg = 0x00009888, .val = 0x0CA9C5C3 },
         { .reg = 0x00009888, .val = 0x22A90000 },
         { .reg = 0x00009888, .val = 0x18AAF000 },
         { .reg = 0x00009888, .val = 0x1AAA0005 },
         { .reg = 0x00009888, .val = 0x18ACAA00 },
         { .reg = 0x00009888, .val = 0x0AAEC5C3 },
         { .reg = 0x00009888, .val = 0x10AE0000 },
         { .reg = 0x00009888, .val = 0x0AAFC000 },
         { .reg = 0x00009888, .val = 0x1CB05000 },
         { .reg = 0x00009888, .val = 0x10DE8000 },
         { .reg = 0x00009888, .val = 0x1ADE3800 },
         { .reg = 0x00009888, .val = 0x00DF8000 },
         { .reg = 0x00009888, .val = 0x0CDF3800 },
         { .reg = 0x00009888, .val = 0x00E04000 },
         { .reg = 0x00009888, .val = 0x0EE04000 },
         { .reg = 0x00009888, .val = 0x1AE000A0 },
         { .reg = 0x00009888, .val = 0x00E18000 },
         { .reg = 0x00009888, .val = 0x0EE18000 },
         { .reg = 0x00009888, .val = 0x14E18000 },
         { .reg = 0x00009888, .val = 0x16E10002 },
         { .reg = 0x00009888, .val = 0x08E9C5C3 },
         { .reg = 0x00009888, .val = 0x22E90000 },
         { .reg = 0x00009888, .val = 0x20E90000 },
         { .reg = 0x00009888, .val = 0x00EAC000 },
         { .reg = 0x00009888, .val = 0x0EEAC000 },
         { .reg = 0x00009888, .val = 0x18EA0500 },
         { .reg = 0x00009888, .val = 0x04EC8000 },
         { .reg = 0x00009888, .val = 0x18EC00A8 },
         { .reg = 0x00009888, .val = 0x00EE00C3 },
         { .reg = 0x00009888, .val = 0x06EEC500 },
         { .reg = 0x00009888, .val = 0x10EE0000 },
         { .reg = 0x00009888, .val = 0x00EF4000 },
         { .reg = 0x00009888, .val = 0x06EF8000 },
         { .reg = 0x00009888, .val = 0x18F04000 },
         { .reg = 0x00009888, .val = 0x1CF00040 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FF00 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x0E08A000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x02082000 },
         { .reg = 0x00009888, .val = 0x14090050 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x06091000 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x040E1000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x0C0E5000 },
         { .reg = 0x00009888, .val = 0x1812FC00 },
         { .reg = 0x00009888, .val = 0x1A12002B },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x0813C000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x00158000 },
         { .reg = 0x00009888, .val = 0x0E158000 },
         { .reg = 0x00009888, .val = 0x16152AD0 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x0615C000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A15C000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x02188000 },
         { .reg = 0x00009888, .val = 0x0418A000 },
         { .reg = 0x00009888, .val = 0x06182000 },
         { .reg = 0x00009888, .val = 0x0A198000 },
         { .reg = 0x00009888, .val = 0x0C19A000 },
         { .reg = 0x00009888, .val = 0x0E192000 },
         { .reg = 0x00009888, .val = 0x0A1CA000 },
         { .reg = 0x00009888, .val = 0x0C1CA000 },
         { .reg = 0x00009888, .val = 0x121D5400 },
         { .reg = 0x00009888, .val = 0x141D0002 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x3E1F3000 },
         { .reg = 0x00009888, .val = 0x461F5030 },
         { .reg = 0x00009888, .val = 0x481F0050 },
         { .reg = 0x00009888, .val = 0x4A1F2000 },
         { .reg = 0x00009888, .val = 0x4C1F0020 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x401F2020 },
         { .reg = 0x00009888, .val = 0x421F3030 },
         { .reg = 0x00009888, .val = 0x441F5050 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00000018 },
         { .reg = 0x00002774, .val = 0x0000FFFC },
         { .reg = 0x00002778, .val = 0x00000060 },
         { .reg = 0x0000277C, .val = 0x0000FFF3 },
         { .reg = 0x00002780, .val = 0x00000180 },
         { .reg = 0x00002784, .val = 0x0000FFCF },
         { .reg = 0x00002788, .val = 0x00000600 },
         { .reg = 0x0000278C, .val = 0x0000FF3F },
         { .reg = 0x00002790, .val = 0x00001800 },
         { .reg = 0x00002794, .val = 0x0000FCFF },
         { .reg = 0x00002798, .val = 0x00006000 },
         { .reg = 0x0000279C, .val = 0x0000F3FF },
         { .reg = 0x000027A0, .val = 0x00018000 },
         { .reg = 0x000027A4, .val = 0x0000CFFF },
         { .reg = 0x000027A8, .val = 0x00060000 },
         { .reg = 0x000027AC, .val = 0x00003FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuActivePerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuStall";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuStallPerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuActive";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuActivePerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_1__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuStall";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuStallPerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 304;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 308;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 312;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread03_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 316;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread04_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 320;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread05_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 324;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread06_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice6 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread06ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 328;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_1__ps_thread07_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice7 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread07ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 332;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "c6d3af7b-037b-4656-95e1-4f838f0a2c14";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14290000 },
         { .reg = 0x00009888, .val = 0x142E0000 },
         { .reg = 0x00009888, .val = 0x14690000 },
         { .reg = 0x00009888, .val = 0x146E0000 },
         { .reg = 0x00009888, .val = 0x14A90000 },
         { .reg = 0x00009888, .val = 0x14AE0000 },
         { .reg = 0x00009888, .val = 0x14E90000 },
         { .reg = 0x00009888, .val = 0x14EE0000 },
         { .reg = 0x00009888, .val = 0x1C1E0006 },
         { .reg = 0x00009888, .val = 0x0E1F0006 },
         { .reg = 0x00009888, .val = 0x1C200005 },
         { .reg = 0x00009888, .val = 0x16210A00 },
         { .reg = 0x00009888, .val = 0x0C29C100 },
         { .reg = 0x00009888, .val = 0x22290000 },
         { .reg = 0x00009888, .val = 0x1A2A0034 },
         { .reg = 0x00009888, .val = 0x182C8000 },
         { .reg = 0x00009888, .val = 0x1A2C0002 },
         { .reg = 0x00009888, .val = 0x0E2E00C1 },
         { .reg = 0x00009888, .val = 0x102E0000 },
         { .reg = 0x00009888, .val = 0x0E2F4000 },
         { .reg = 0x00009888, .val = 0x1E300010 },
         { .reg = 0x00009888, .val = 0x1C5E0008 },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x0E5F0008 },
         { .reg = 0x00009888, .val = 0x025F8000 },
         { .reg = 0x00009888, .val = 0x1C600010 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x16612000 },
         { .reg = 0x00009888, .val = 0x02618000 },
         { .reg = 0x00009888, .val = 0x0E69C100 },
         { .reg = 0x00009888, .val = 0x22690000 },
         { .reg = 0x00009888, .val = 0x1A6A0040 },
         { .reg = 0x00009888, .val = 0x026AC000 },
         { .reg = 0x00009888, .val = 0x1A6C0008 },
         { .reg = 0x00009888, .val = 0x066C8000 },
         { .reg = 0x00009888, .val = 0x006EC100 },
         { .reg = 0x00009888, .val = 0x106E0000 },
         { .reg = 0x00009888, .val = 0x006F8000 },
         { .reg = 0x00009888, .val = 0x1A700400 },
         { .reg = 0x00009888, .val = 0x149E8000 },
         { .reg = 0x00009888, .val = 0x169E8000 },
         { .reg = 0x00009888, .val = 0x049F8000 },
         { .reg = 0x00009888, .val = 0x069F8000 },
         { .reg = 0x00009888, .val = 0x04A04000 },
         { .reg = 0x00009888, .val = 0x06A04000 },
         { .reg = 0x00009888, .val = 0x04A18000 },
         { .reg = 0x00009888, .val = 0x06A18000 },
         { .reg = 0x00009888, .val = 0x02A900C1 },
         { .reg = 0x00009888, .val = 0x22A90000 },
         { .reg = 0x00009888, .val = 0x04AA4000 },
         { .reg = 0x00009888, .val = 0x06AAC000 },
         { .reg = 0x00009888, .val = 0x08AC8000 },
         { .reg = 0x00009888, .val = 0x0AAC8000 },
         { .reg = 0x00009888, .val = 0x02AEC100 },
         { .reg = 0x00009888, .val = 0x10AE0000 },
         { .reg = 0x00009888, .val = 0x02AF8000 },
         { .reg = 0x00009888, .val = 0x1AB04000 },
         { .reg = 0x00009888, .val = 0x18DEC000 },
         { .reg = 0x00009888, .val = 0x08DF8000 },
         { .reg = 0x00009888, .val = 0x0ADF8000 },
         { .reg = 0x00009888, .val = 0x08E04000 },
         { .reg = 0x00009888, .val = 0x0AE04000 },
         { .reg = 0x00009888, .val = 0x08E18000 },
         { .reg = 0x00009888, .val = 0x0AE18000 },
         { .reg = 0x00009888, .val = 0x04E900C1 },
         { .reg = 0x00009888, .val = 0x22E90000 },
         { .reg = 0x00009888, .val = 0x1CE90000 },
         { .reg = 0x00009888, .val = 0x08EA4000 },
         { .reg = 0x00009888, .val = 0x0AEAC000 },
         { .reg = 0x00009888, .val = 0x0CEC8000 },
         { .reg = 0x00009888, .val = 0x0EEC8000 },
         { .reg = 0x00009888, .val = 0x04EEC100 },
         { .reg = 0x00009888, .val = 0x10EE0000 },
         { .reg = 0x00009888, .val = 0x04EF8000 },
         { .reg = 0x00009888, .val = 0x1CF00004 },
         { .reg = 0x00009888, .val = 0x10040150 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E040015 },
         { .reg = 0x00009888, .val = 0x14050054 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x260600E0 },
         { .reg = 0x00009888, .val = 0x24063E00 },
         { .reg = 0x00009888, .val = 0x0E088000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x14090040 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x080E5000 },
         { .reg = 0x00009888, .val = 0x1A120020 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x0412C000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0A138000 },
         { .reg = 0x00009888, .val = 0x0413E000 },
         { .reg = 0x00009888, .val = 0x0E148000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x0214C000 },
         { .reg = 0x00009888, .val = 0x16156000 },
         { .reg = 0x00009888, .val = 0x18150007 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C188000 },
         { .reg = 0x00009888, .val = 0x0E182000 },
         { .reg = 0x00009888, .val = 0x1419000A },
         { .reg = 0x00009888, .val = 0x021CA000 },
         { .reg = 0x00009888, .val = 0x0A1DA000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3061 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x521F4000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2420 },
         { .reg = 0x00009888, .val = 0x501F0024 },
         { .reg = 0x00009888, .val = 0x4C1F3050 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F2000 },
         { .reg = 0x00009888, .val = 0x421F5000 },
         { .reg = 0x00009888, .val = 0x441F3730 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread03_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread04_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread05_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread06_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice6 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread06ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_2__non_ps_thread07_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice7 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread07ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_2__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_tdl_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_3";
   query->symbol_name = "TDL_3";
   query->guid = "fd25ec19-3ed1-40c9-8648-1d2387449a92";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 51);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14292C00 },
         { .reg = 0x00009888, .val = 0x16290013 },
         { .reg = 0x00009888, .val = 0x142E2C00 },
         { .reg = 0x00009888, .val = 0x162E0013 },
         { .reg = 0x00009888, .val = 0x14692C00 },
         { .reg = 0x00009888, .val = 0x16690013 },
         { .reg = 0x00009888, .val = 0x146E2C00 },
         { .reg = 0x00009888, .val = 0x166E0013 },
         { .reg = 0x00009888, .val = 0x14A92C00 },
         { .reg = 0x00009888, .val = 0x16A90013 },
         { .reg = 0x00009888, .val = 0x14AE2C00 },
         { .reg = 0x00009888, .val = 0x16AE0013 },
         { .reg = 0x00009888, .val = 0x14E92C00 },
         { .reg = 0x00009888, .val = 0x16E90013 },
         { .reg = 0x00009888, .val = 0x14EE2C00 },
         { .reg = 0x00009888, .val = 0x16EE0013 },
         { .reg = 0x00009888, .val = 0x101E8000 },
         { .reg = 0x00009888, .val = 0x1A1E3800 },
         { .reg = 0x00009888, .val = 0x001F8000 },
         { .reg = 0x00009888, .val = 0x0C1F3800 },
         { .reg = 0x00009888, .val = 0x00204000 },
         { .reg = 0x00009888, .val = 0x0E204000 },
         { .reg = 0x00009888, .val = 0x1A2000A0 },
         { .reg = 0x00009888, .val = 0x00218000 },
         { .reg = 0x00009888, .val = 0x0E218000 },
         { .reg = 0x00009888, .val = 0x14218000 },
         { .reg = 0x00009888, .val = 0x16210002 },
         { .reg = 0x00009888, .val = 0x002900C7 },
         { .reg = 0x00009888, .val = 0x0629CF00 },
         { .reg = 0x00009888, .val = 0x22290000 },
         { .reg = 0x00009888, .val = 0x10290000 },
         { .reg = 0x00009888, .val = 0x20290000 },
         { .reg = 0x00009888, .val = 0x002A4000 },
         { .reg = 0x00009888, .val = 0x0E2A4000 },
         { .reg = 0x00009888, .val = 0x182A0F00 },
         { .reg = 0x00009888, .val = 0x042C8000 },
         { .reg = 0x00009888, .val = 0x182C00A8 },
         { .reg = 0x00009888, .val = 0x082ECFC7 },
         { .reg = 0x00009888, .val = 0x102E0000 },
         { .reg = 0x00009888, .val = 0x082FC000 },
         { .reg = 0x00009888, .val = 0x1C300500 },
         { .reg = 0x00009888, .val = 0x1A5EC000 },
         { .reg = 0x00009888, .val = 0x1C5E0003 },
         { .reg = 0x00009888, .val = 0x0C5FC000 },
         { .reg = 0x00009888, .val = 0x0E5F0003 },
         { .reg = 0x00009888, .val = 0x1A602A00 },
         { .reg = 0x00009888, .val = 0x1C600001 },
         { .reg = 0x00009888, .val = 0x166102A8 },
         { .reg = 0x00009888, .val = 0x0A69CFC7 },
         { .reg = 0x00009888, .val = 0x22690000 },
         { .reg = 0x00009888, .val = 0x186A5000 },
         { .reg = 0x00009888, .val = 0x1A6A000F },
         { .reg = 0x00009888, .val = 0x186CAA00 },
         { .reg = 0x00009888, .val = 0x0C6ECFC7 },
         { .reg = 0x00009888, .val = 0x106E0000 },
         { .reg = 0x00009888, .val = 0x0C6FC000 },
         { .reg = 0x00009888, .val = 0x1E700005 },
         { .reg = 0x00009888, .val = 0x1C9E000C },
         { .reg = 0x00009888, .val = 0x129E8000 },
         { .reg = 0x00009888, .val = 0x149E8000 },
         { .reg = 0x00009888, .val = 0x0E9F000C },
         { .reg = 0x00009888, .val = 0x029F8000 },
         { .reg = 0x00009888, .val = 0x049F8000 },
         { .reg = 0x00009888, .val = 0x1CA00014 },
         { .reg = 0x00009888, .val = 0x02A04000 },
         { .reg = 0x00009888, .val = 0x04A04000 },
         { .reg = 0x00009888, .val = 0x16A12800 },
         { .reg = 0x00009888, .val = 0x02A18000 },
         { .reg = 0x00009888, .val = 0x04A18000 },
         { .reg = 0x00009888, .val = 0x0EA9CFC7 },
         { .reg = 0x00009888, .val = 0x22A90000 },
         { .reg = 0x00009888, .val = 0x1AAA0050 },
         { .reg = 0x00009888, .val = 0x02AAC000 },
         { .reg = 0x00009888, .val = 0x04AAC000 },
         { .reg = 0x00009888, .val = 0x1AAC000A },
         { .reg = 0x00009888, .val = 0x06AC8000 },
         { .reg = 0x00009888, .val = 0x08AC8000 },
         { .reg = 0x00009888, .val = 0x00AEC700 },
         { .reg = 0x00009888, .val = 0x02AE00CF },
         { .reg = 0x00009888, .val = 0x10AE0000 },
         { .reg = 0x00009888, .val = 0x00AF8000 },
         { .reg = 0x00009888, .val = 0x02AF4000 },
         { .reg = 0x00009888, .val = 0x1AB01400 },
         { .reg = 0x00009888, .val = 0x16DE8000 },
         { .reg = 0x00009888, .val = 0x18DEC000 },
         { .reg = 0x00009888, .val = 0x1ADE0400 },
         { .reg = 0x00009888, .val = 0x06DF8000 },
         { .reg = 0x00009888, .val = 0x08DF8000 },
         { .reg = 0x00009888, .val = 0x0ADF8000 },
         { .reg = 0x00009888, .val = 0x0CDF0400 },
         { .reg = 0x00009888, .val = 0x06E04000 },
         { .reg = 0x00009888, .val = 0x08E04000 },
         { .reg = 0x00009888, .val = 0x0AE04000 },
         { .reg = 0x00009888, .val = 0x0CE04000 },
         { .reg = 0x00009888, .val = 0x06E18000 },
         { .reg = 0x00009888, .val = 0x08E18000 },
         { .reg = 0x00009888, .val = 0x0AE18000 },
         { .reg = 0x00009888, .val = 0x0CE18000 },
         { .reg = 0x00009888, .val = 0x02E9C700 },
         { .reg = 0x00009888, .val = 0x04E900CF },
         { .reg = 0x00009888, .val = 0x22E90000 },
         { .reg = 0x00009888, .val = 0x1CE90000 },
         { .reg = 0x00009888, .val = 0x06EA4000 },
         { .reg = 0x00009888, .val = 0x08EA4000 },
         { .reg = 0x00009888, .val = 0x0AEAC000 },
         { .reg = 0x00009888, .val = 0x0CEAC000 },
         { .reg = 0x00009888, .val = 0x0AEC8000 },
         { .reg = 0x00009888, .val = 0x0CEC8000 },
         { .reg = 0x00009888, .val = 0x0EEC8000 },
         { .reg = 0x00009888, .val = 0x18EC0002 },
         { .reg = 0x00009888, .val = 0x04EEC700 },
         { .reg = 0x00009888, .val = 0x06EE00CF },
         { .reg = 0x00009888, .val = 0x10EE0000 },
         { .reg = 0x00009888, .val = 0x04EF8000 },
         { .reg = 0x00009888, .val = 0x06EF4000 },
         { .reg = 0x00009888, .val = 0x1CF00014 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FF00 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x0A08A000 },
         { .reg = 0x00009888, .val = 0x0C08A000 },
         { .reg = 0x00009888, .val = 0x0E095000 },
         { .reg = 0x00009888, .val = 0x14090005 },
         { .reg = 0x00009888, .val = 0x020D8000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x060E4000 },
         { .reg = 0x00009888, .val = 0x080E5000 },
         { .reg = 0x00009888, .val = 0x0A0E1000 },
         { .reg = 0x00009888, .val = 0x1812A800 },
         { .reg = 0x00009888, .val = 0x1A12003E },
         { .reg = 0x00009888, .val = 0x0212C000 },
         { .reg = 0x00009888, .val = 0x0412C000 },
         { .reg = 0x00009888, .val = 0x0813C000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x0015C000 },
         { .reg = 0x00009888, .val = 0x0E15C000 },
         { .reg = 0x00009888, .val = 0x16152AF8 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06158000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C158000 },
         { .reg = 0x00009888, .val = 0x00182000 },
         { .reg = 0x00009888, .val = 0x06188000 },
         { .reg = 0x00009888, .val = 0x0818A000 },
         { .reg = 0x00009888, .val = 0x08192000 },
         { .reg = 0x00009888, .val = 0x0E198000 },
         { .reg = 0x00009888, .val = 0x12190140 },
         { .reg = 0x00009888, .val = 0x0E1CA000 },
         { .reg = 0x00009888, .val = 0x001C8000 },
         { .reg = 0x00009888, .val = 0x021C2000 },
         { .reg = 0x00009888, .val = 0x141D0028 },
         { .reg = 0x00009888, .val = 0x081D8000 },
         { .reg = 0x00009888, .val = 0x0A1D2000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2420 },
         { .reg = 0x00009888, .val = 0x3E1F5000 },
         { .reg = 0x00009888, .val = 0x461F3050 },
         { .reg = 0x00009888, .val = 0x481F2030 },
         { .reg = 0x00009888, .val = 0x4A1F0020 },
         { .reg = 0x00009888, .val = 0x4C1F2000 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x401F0000 },
         { .reg = 0x00009888, .val = 0x421F5050 },
         { .reg = 0x00009888, .val = 0x441F3030 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__tdl_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__tdl_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__tdl_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header03_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header03_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header04_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header04_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header05_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header05_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 284;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header06_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader06ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 288;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header06_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader06ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 292;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header07_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader07ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 296;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = icl__tdl_3__thread_header07_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader07ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 300;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "40dc79f2-88c8-47c6-8f86-f509e39fbe5d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 11);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x0E061200 },
         { .reg = 0x00009888, .val = 0x22062400 },
         { .reg = 0x00009888, .val = 0x10002400 },
         { .reg = 0x00009888, .val = 0x00038000 },
         { .reg = 0x00009888, .val = 0x06032000 },
         { .reg = 0x00009888, .val = 0x020600F3 },
         { .reg = 0x00009888, .val = 0x0C060043 },
         { .reg = 0x00009888, .val = 0x20060000 },
         { .reg = 0x00009888, .val = 0x02000023 },
         { .reg = 0x00009888, .val = 0x20002000 },
         { .reg = 0x00009888, .val = 0x081A8000 },
         { .reg = 0x00009888, .val = 0x3A1F6000 },
         { .reg = 0x00009888, .val = 0x4E1F2900 },
         { .reg = 0x00009888, .val = 0x501F00E9 },
         { .reg = 0x00009888, .val = 0x10204000 },
         { .reg = 0x00009888, .val = 0x0C214000 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x401F4131 },
         { .reg = 0x00009888, .val = 0x421F1021 },
         { .reg = 0x00009888, .val = 0x441F3100 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009884, .val = 0x00000002 },
         { .reg = 0x00009888, .val = 0x181B2400 },
         { .reg = 0x00009888, .val = 0x185B2400 },
         { .reg = 0x00009888, .val = 0x12240120 },
         { .reg = 0x00009888, .val = 0x06218000 },
         { .reg = 0x00009888, .val = 0x08212000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x180B8000 },
         { .reg = 0x00009888, .val = 0x0A0C8000 },
         { .reg = 0x00009888, .val = 0x040F8000 },
         { .reg = 0x00009888, .val = 0x0E080010 },
         { .reg = 0x00009888, .val = 0x041B8300 },
         { .reg = 0x00009888, .val = 0x101B0000 },
         { .reg = 0x00009888, .val = 0x221B0000 },
         { .reg = 0x00009888, .val = 0x041E4000 },
         { .reg = 0x00009888, .val = 0x084A8000 },
         { .reg = 0x00009888, .val = 0x184B4000 },
         { .reg = 0x00009888, .val = 0x084C8000 },
         { .reg = 0x00009888, .val = 0x044F2000 },
         { .reg = 0x00009888, .val = 0x0E480004 },
         { .reg = 0x00009888, .val = 0x045B0083 },
         { .reg = 0x00009888, .val = 0x105B0000 },
         { .reg = 0x00009888, .val = 0x225B0000 },
         { .reg = 0x00009888, .val = 0x045E1000 },
         { .reg = 0x00009888, .val = 0x0823A000 },
         { .reg = 0x00009888, .val = 0x02242980 },
         { .reg = 0x00009888, .val = 0x10240000 },
         { .reg = 0x00009888, .val = 0x06240000 },
         { .reg = 0x00009888, .val = 0x0A244000 },
         { .reg = 0x00009888, .val = 0x1C250004 },
         { .reg = 0x00009888, .val = 0x08268000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0007C000 },
         { .reg = 0x00002774, .val = 0x000007FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__posh_engine_busy__read;
      counter->name = "Posh Ring Busy";
      counter->desc = "The percentage of time when posh command streamer was busy. Unit: percent.";
      counter->symbol_name = "PoshEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 52;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TestOa";
   query->symbol_name = "TestOa";
   query->guid = "e617cfd5-6cc5-4143-8994-cd36cebb921f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x16130000 },
         { .reg = 0x00009888, .val = 0x24000001 },
         { .reg = 0x00009888, .val = 0x0E130056 },
         { .reg = 0x00009888, .val = 0x10130000 },
         { .reg = 0x00009888, .val = 0x1A130000 },
         { .reg = 0x00009888, .val = 0x541F0001 },
         { .reg = 0x00009888, .val = 0x181F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x301F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x0000FFFF },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x0000FFFF },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x0000FFFF },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.16666 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.6666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
icl_register_async_compute_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "AsyncCompute";
   query->symbol_name = "AsyncCompute";
   query->guid = "eca4858e-27a4-4bcf-a57f-2616e2f08950";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00001000 },
         { .reg = 0x0000E658, .val = 0x00051050 },
         { .reg = 0x0000E758, .val = 0x00011010 },
         { .reg = 0x0000E45C, .val = 0x00061060 },
         { .reg = 0x0000E55C, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = icl__async_compute__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = icl__async_compute__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__cs_fpu0_active__read;
      counter->name = "CS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu0Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__cs_fpu1_active__read;
      counter->name = "CS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpu1Active";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = icl__async_compute__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_icl(struct intel_perf_config *perf)
{
   icl_register_render_basic_counter_query(perf);
   icl_register_compute_basic_counter_query(perf);
   icl_register_compute_extended_counter_query(perf);
   icl_register_compute_l3_cache_counter_query(perf);
   icl_register_render_pipe_profile_counter_query(perf);
   icl_register_hdc_and_sf_counter_query(perf);
   icl_register_rasterizer_and_pixel_backend_counter_query(perf);
   icl_register_l3_1_counter_query(perf);
   icl_register_l3_2_counter_query(perf);
   icl_register_l3_3_counter_query(perf);
   icl_register_l3_4_counter_query(perf);
   icl_register_l3_5_counter_query(perf);
   icl_register_sampler_1_counter_query(perf);
   icl_register_sampler_2_counter_query(perf);
   icl_register_tdl_1_counter_query(perf);
   icl_register_tdl_2_counter_query(perf);
   icl_register_tdl_3_counter_query(perf);
   icl_register_gpu_busyness_counter_query(perf);
   icl_register_test_oa_counter_query(perf);
   icl_register_async_compute_counter_query(perf);
}


static void
ehl_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "c693e665-867f-4362-91b6-85337f932010";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x142C0014 },
         { .reg = 0x00009888, .val = 0x14120700 },
         { .reg = 0x00009888, .val = 0x121500E0 },
         { .reg = 0x00009888, .val = 0x1E1E0030 },
         { .reg = 0x00009888, .val = 0x1A204000 },
         { .reg = 0x00009888, .val = 0x1C200001 },
         { .reg = 0x00009888, .val = 0x1E213000 },
         { .reg = 0x00009888, .val = 0x0E2C1831 },
         { .reg = 0x00009888, .val = 0x102C0000 },
         { .reg = 0x00009888, .val = 0x182C0000 },
         { .reg = 0x00009888, .val = 0x10040140 },
         { .reg = 0x00009888, .val = 0x0E040005 },
         { .reg = 0x00009888, .val = 0x14050050 },
         { .reg = 0x00009888, .val = 0x06054000 },
         { .reg = 0x00009888, .val = 0x08051000 },
         { .reg = 0x00009888, .val = 0x260600C0 },
         { .reg = 0x00009888, .val = 0x24061800 },
         { .reg = 0x00009888, .val = 0x04120023 },
         { .reg = 0x00009888, .val = 0x10120000 },
         { .reg = 0x00009888, .val = 0x08120000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x04144000 },
         { .reg = 0x00009888, .val = 0x02150980 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x1815000F },
         { .reg = 0x00009888, .val = 0x06150000 },
         { .reg = 0x00009888, .val = 0x08154000 },
         { .reg = 0x00009888, .val = 0x0E18A000 },
         { .reg = 0x00009888, .val = 0x14190028 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1A1C01C0 },
         { .reg = 0x00009888, .val = 0x1C1C000A },
         { .reg = 0x00009888, .val = 0x1A5C01C0 },
         { .reg = 0x00009888, .val = 0x1C5C000A },
         { .reg = 0x00009888, .val = 0x001C0097 },
         { .reg = 0x00009888, .val = 0x0A1CA700 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x081D0100 },
         { .reg = 0x00009888, .val = 0x0A1D0008 },
         { .reg = 0x00009888, .val = 0x085C0097 },
         { .reg = 0x00009888, .val = 0x0A5C00A7 },
         { .reg = 0x00009888, .val = 0x105C0000 },
         { .reg = 0x00009888, .val = 0x0A5D0005 },
         { .reg = 0x00009888, .val = 0x181E8000 },
         { .reg = 0x00009888, .val = 0x1E1EB300 },
         { .reg = 0x00009888, .val = 0x301F4000 },
         { .reg = 0x00009888, .val = 0x501F2004 },
         { .reg = 0x00009888, .val = 0x521F0090 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x4E1F2000 },
         { .reg = 0x00009888, .val = 0x3E1F6100 },
         { .reg = 0x00009888, .val = 0x461F6100 },
         { .reg = 0x00009888, .val = 0x481F6100 },
         { .reg = 0x00009888, .val = 0x4A1F0061 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x421F3060 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00051050 },
         { .reg = 0x0000E45C, .val = 0x00000052 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_basic__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__render_basic__sampler00_busy__read;
         counter->name = "Sampler00 Busy";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 9) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__render_basic__samplers_busy__read;
         counter->name = "Samplers Busy";
         counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplersBusy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__render_basic__sampler00_bottleneck__read;
         counter->name = "Sampler00 Bottleneck";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Bottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__sampler_l1_misses__read;
      counter->name = "Sampler Cache Misses";
      counter->desc = "The total number of sampler cache misses in all LODs in all sampler units. Unit: messages.";
      counter->symbol_name = "SamplerL1Misses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      if (perf->sys_vars.subslice_mask & 9) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__render_basic__sampler_bottleneck__read;
         counter->name = "Samplers Bottleneck";
         counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplerBottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "ae4d01a9-fe55-453c-8c7a-036acd5eacff";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x12230012 },
         { .reg = 0x00009888, .val = 0x10230019 },
         { .reg = 0x00009888, .val = 0x12A30012 },
         { .reg = 0x00009888, .val = 0x10A30019 },
         { .reg = 0x00009888, .val = 0x1A200080 },
         { .reg = 0x00009888, .val = 0x1C200002 },
         { .reg = 0x00009888, .val = 0x04208000 },
         { .reg = 0x00009888, .val = 0x08208000 },
         { .reg = 0x00009888, .val = 0x0C208000 },
         { .reg = 0x00009888, .val = 0x0A230031 },
         { .reg = 0x00009888, .val = 0x0E231E00 },
         { .reg = 0x00009888, .val = 0x0223003D },
         { .reg = 0x00009888, .val = 0x04230032 },
         { .reg = 0x00009888, .val = 0x06230033 },
         { .reg = 0x00009888, .val = 0x00230000 },
         { .reg = 0x00009888, .val = 0x1AA08200 },
         { .reg = 0x00009888, .val = 0x02A08000 },
         { .reg = 0x00009888, .val = 0x06A08000 },
         { .reg = 0x00009888, .val = 0x0AA08000 },
         { .reg = 0x00009888, .val = 0x0AA31880 },
         { .reg = 0x00009888, .val = 0x0EA3003C },
         { .reg = 0x00009888, .val = 0x00A31E80 },
         { .reg = 0x00009888, .val = 0x02A31900 },
         { .reg = 0x00009888, .val = 0x04A31980 },
         { .reg = 0x00009888, .val = 0x0E044055 },
         { .reg = 0x00009888, .val = 0x10040141 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050050 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600CC },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x18123000 },
         { .reg = 0x00009888, .val = 0x1A12000C },
         { .reg = 0x00009888, .val = 0x0212C000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0A12C000 },
         { .reg = 0x00009888, .val = 0x08138000 },
         { .reg = 0x00009888, .val = 0x0A134000 },
         { .reg = 0x00009888, .val = 0x0413A000 },
         { .reg = 0x00009888, .val = 0x06132000 },
         { .reg = 0x00009888, .val = 0x0A148000 },
         { .reg = 0x00009888, .val = 0x0E144000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x04148000 },
         { .reg = 0x00009888, .val = 0x16150380 },
         { .reg = 0x00009888, .val = 0x1815000D },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x0415C000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A154000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x0A182000 },
         { .reg = 0x00009888, .val = 0x0E188000 },
         { .reg = 0x00009888, .val = 0x02182000 },
         { .reg = 0x00009888, .val = 0x04182000 },
         { .reg = 0x00009888, .val = 0x06182000 },
         { .reg = 0x00009888, .val = 0x12190400 },
         { .reg = 0x00009888, .val = 0x14190020 },
         { .reg = 0x00009888, .val = 0x0A192000 },
         { .reg = 0x00009888, .val = 0x0C192000 },
         { .reg = 0x00009888, .val = 0x0E192000 },
         { .reg = 0x00009888, .val = 0x0A1C8000 },
         { .reg = 0x00009888, .val = 0x0E1C2000 },
         { .reg = 0x00009888, .val = 0x001C8000 },
         { .reg = 0x00009888, .val = 0x021C8000 },
         { .reg = 0x00009888, .val = 0x041C8000 },
         { .reg = 0x00009888, .val = 0x121D1000 },
         { .reg = 0x00009888, .val = 0x141D0008 },
         { .reg = 0x00009888, .val = 0x081D8000 },
         { .reg = 0x00009888, .val = 0x0A1D8000 },
         { .reg = 0x00009888, .val = 0x0C1D8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1A1C01C0 },
         { .reg = 0x00009888, .val = 0x1C1C000A },
         { .reg = 0x00009888, .val = 0x1A5C01C0 },
         { .reg = 0x00009888, .val = 0x1C5C000A },
         { .reg = 0x00009888, .val = 0x001C0097 },
         { .reg = 0x00009888, .val = 0x0C1CA700 },
         { .reg = 0x00009888, .val = 0x101C0000 },
         { .reg = 0x00009888, .val = 0x081D0100 },
         { .reg = 0x00009888, .val = 0x0A1D0020 },
         { .reg = 0x00009888, .val = 0x085C0097 },
         { .reg = 0x00009888, .val = 0x0C5C00A7 },
         { .reg = 0x00009888, .val = 0x105C0000 },
         { .reg = 0x00009888, .val = 0x0A5D0011 },
         { .reg = 0x00009888, .val = 0x181E8000 },
         { .reg = 0x00009888, .val = 0x1E1E0300 },
         { .reg = 0x00009888, .val = 0x201E000B },
         { .reg = 0x00009888, .val = 0x301F4000 },
         { .reg = 0x00009888, .val = 0x501F2124 },
         { .reg = 0x00009888, .val = 0x521F2520 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2430 },
         { .reg = 0x00009888, .val = 0x3E1F0300 },
         { .reg = 0x00009888, .val = 0x461F0300 },
         { .reg = 0x00009888, .val = 0x481F3000 },
         { .reg = 0x00009888, .val = 0x4A1F0300 },
         { .reg = 0x00009888, .val = 0x4C1F0003 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x401F3000 },
         { .reg = 0x00009888, .val = 0x421F3000 },
         { .reg = 0x00009888, .val = 0x441F3000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00000008 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__typed_bytes_read__read;
      counter->name = "Typed Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__typed_bytes_written__read;
      counter->name = "Typed Bytes Written";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "TypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__untyped_bytes_read__read;
      counter->name = "Untyped Bytes Read";
      counter->desc = "The total number of typed memory bytes read via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesRead";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__untyped_bytes_written__read;
      counter->name = "Untyped Writes";
      counter->desc = "The total number of untyped memory bytes written via Data Port. Unit: bytes.";
      counter->symbol_name = "UntypedBytesWritten";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__typed_atomics__read;
      counter->name = "Typed Atomics Accesses";
      counter->desc = "The total number of typed atomic accesses via Data Port. Unit: events.";
      counter->symbol_name = "TypedAtomics";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 280;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_compute_extended_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "ComputeExtended metrics set";
   query->symbol_name = "ComputeExtended";
   query->guid = "05700b63-f044-4c79-b8ae-676972114745";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 22);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14210160 },
         { .reg = 0x00009888, .val = 0x18210120 },
         { .reg = 0x00009888, .val = 0x12230019 },
         { .reg = 0x00009888, .val = 0x10230012 },
         { .reg = 0x00009888, .val = 0x101E8000 },
         { .reg = 0x00009888, .val = 0x1C1EE000 },
         { .reg = 0x00009888, .val = 0x1E1E001F },
         { .reg = 0x00009888, .val = 0x00204000 },
         { .reg = 0x00009888, .val = 0x0E204000 },
         { .reg = 0x00009888, .val = 0x18204000 },
         { .reg = 0x00009888, .val = 0x1A205550 },
         { .reg = 0x00009888, .val = 0x04208000 },
         { .reg = 0x00009888, .val = 0x06208000 },
         { .reg = 0x00009888, .val = 0x08208000 },
         { .reg = 0x00009888, .val = 0x0A208000 },
         { .reg = 0x00009888, .val = 0x0C208000 },
         { .reg = 0x00009888, .val = 0x00210041 },
         { .reg = 0x00009888, .val = 0x06214200 },
         { .reg = 0x00009888, .val = 0x08214443 },
         { .reg = 0x00009888, .val = 0x0A214645 },
         { .reg = 0x00009888, .val = 0x0C217647 },
         { .reg = 0x00009888, .val = 0x0E210073 },
         { .reg = 0x00009888, .val = 0x1E210000 },
         { .reg = 0x00009888, .val = 0x10210000 },
         { .reg = 0x00009888, .val = 0x02231AB4 },
         { .reg = 0x00009888, .val = 0x04231DBA },
         { .reg = 0x00009888, .val = 0x06230039 },
         { .reg = 0x00009888, .val = 0x00230000 },
         { .reg = 0x00009888, .val = 0x0C044400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040055 },
         { .reg = 0x00009888, .val = 0x04051000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050015 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FD00 },
         { .reg = 0x00009888, .val = 0x2606007F },
         { .reg = 0x00009888, .val = 0x0015C000 },
         { .reg = 0x00009888, .val = 0x0E15C000 },
         { .reg = 0x00009888, .val = 0x16157FF8 },
         { .reg = 0x00009888, .val = 0x18150003 },
         { .reg = 0x00009888, .val = 0x0415C000 },
         { .reg = 0x00009888, .val = 0x0615C000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A15C000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x00182000 },
         { .reg = 0x00009888, .val = 0x0618A000 },
         { .reg = 0x00009888, .val = 0x0818A000 },
         { .reg = 0x00009888, .val = 0x0A18A000 },
         { .reg = 0x00009888, .val = 0x0C18A000 },
         { .reg = 0x00009888, .val = 0x0E182000 },
         { .reg = 0x00009888, .val = 0x0218A000 },
         { .reg = 0x00009888, .val = 0x0418A000 },
         { .reg = 0x00009888, .val = 0x08192000 },
         { .reg = 0x00009888, .val = 0x0E19A000 },
         { .reg = 0x00009888, .val = 0x12195540 },
         { .reg = 0x00009888, .val = 0x1419000A },
         { .reg = 0x00009888, .val = 0x0A19A000 },
         { .reg = 0x00009888, .val = 0x0C19A000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x18130343 },
         { .reg = 0x00009888, .val = 0x22000008 },
         { .reg = 0x00009888, .val = 0x12008000 },
         { .reg = 0x00009888, .val = 0x0E137300 },
         { .reg = 0x00009888, .val = 0x00137B00 },
         { .reg = 0x00009888, .val = 0x10130000 },
         { .reg = 0x00009888, .val = 0x1E130000 },
         { .reg = 0x00009888, .val = 0x1C130000 },
         { .reg = 0x00009888, .val = 0x0E168000 },
         { .reg = 0x00009888, .val = 0x00168000 },
         { .reg = 0x00009888, .val = 0x1A1A0010 },
         { .reg = 0x00009888, .val = 0x021A4000 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F000C },
         { .reg = 0x00009888, .val = 0x3A1F2000 },
         { .reg = 0x00009888, .val = 0x4E1F2441 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x401F1041 },
         { .reg = 0x00009888, .val = 0x421F1010 },
         { .reg = 0x00009888, .val = 0x441F1010 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x30000036 },
         { .reg = 0x00002774, .val = 0x01FFFE00 },
         { .reg = 0x00002778, .val = 0x31000034 },
         { .reg = 0x0000277C, .val = 0x01FFFE00 },
         { .reg = 0x00002780, .val = 0x00000C9A },
         { .reg = 0x00002784, .val = 0x0000FE00 },
         { .reg = 0x00002788, .val = 0x00000C92 },
         { .reg = 0x0000278C, .val = 0x0000FE00 },
         { .reg = 0x00002790, .val = 0x00000CA2 },
         { .reg = 0x00002794, .val = 0x0000FE00 },
         { .reg = 0x00002798, .val = 0x00000E42 },
         { .reg = 0x0000279C, .val = 0x0000FE00 },
         { .reg = 0x000027A0, .val = 0x00000E6A },
         { .reg = 0x000027A4, .val = 0x0000FE00 },
         { .reg = 0x000027A8, .val = 0x00000C32 },
         { .reg = 0x000027AC, .val = 0x0000FE00 },
         { .reg = 0x0000274C, .val = 0x87643210 },
         { .reg = 0x00002744, .val = 0x00001811 },
         { .reg = 0x00002748, .val = 0x87654310 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_extended__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_extended__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_extended__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__compute_extended__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_extended__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_extended__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__typed_atomics00__read;
         counter->name = "Typed Atomics 00";
         counter->desc = "Slice 0 Subslice group 0 typed atomics. Unit: messages.";
         counter->symbol_name = "TypedAtomics00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 40;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__typed_reads00__read;
         counter->name = "Typed Reads 00";
         counter->desc = "Slice 0 Subslice group 0 typed reads. Unit: messages.";
         counter->symbol_name = "TypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 48;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__typed_writes00__read;
         counter->name = "Typed Writes 00";
         counter->desc = "Slice 0 Subslice group 0 typed writes. Unit: messages.";
         counter->symbol_name = "TypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 56;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__untyped_reads00__read;
         counter->name = "Untyped Reads 00";
         counter->desc = "Slice 0 Subslice group 0 untyped reads (including SLM reads). Unit: messages.";
         counter->symbol_name = "UntypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 64;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__untyped_writes00__read;
         counter->name = "Untyped Writes 00";
         counter->desc = "Slice 0 Subslice group 0 untyped writes (including SLM writes). Unit: messages.";
         counter->symbol_name = "UntypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 72;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__eu_typed_reads00__read;
         counter->name = "Eu Typed Reads 00";
         counter->desc = "Slice0 Subslice group 0 Eu Typed Reads Unit: messages.";
         counter->symbol_name = "EuTypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 80;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__eu_typed_writes00__read;
         counter->name = "Eu Typed Writes 00";
         counter->desc = "Slice0 Subslice group 0 Eu Typed Writes Unit: messages.";
         counter->symbol_name = "EuTypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 88;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__eu_typed_atomics00__read;
         counter->name = "Eu Typed Atomics 00";
         counter->desc = "Slice0 Subslice group 0 Eu Typed Atomics Unit: messages.";
         counter->symbol_name = "EuTypedAtomics00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__eu_a32_untyped_reads00__read;
         counter->name = "Eu A32 Untyped Reads 00";
         counter->desc = "Slice0 Subslice group 0 Eu A32 Untyped Reads Unit: messages.";
         counter->symbol_name = "EuA32UntypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__eu_a32_untyped_writes00__read;
         counter->name = "Eu A32 Untyped Writes 00";
         counter->desc = "Slice0 Subslice group 0 Eu A32 Untyped Writes Unit: messages.";
         counter->symbol_name = "EuA32UntypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__compute_extended__eu_a64_untyped_reads00__read;
         counter->name = "Eu 64 Untyped Reads 00";
         counter->desc = "Slice0 Subslice group 0 Eu 64 Untyped Reads Unit: messages.";
         counter->symbol_name = "EuA64UntypedReads00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_extended__eu_a64_untyped_writes00__read;
         counter->name = "Eu A64 Untyped Writes 00";
         counter->desc = "Slice0 Subslice group 0 Eu A64 Untyped Writes Unit: messages.";
         counter->symbol_name = "EuA64UntypedWrites00";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__compute_extended__typed_atomics_per_cache_line__read;
         counter->name = "TypedAtomicsPerCacheLine";
         counter->desc = "The ratio of EU typed atomics requests to L3 cache line writes.";
         counter->symbol_name = "TypedAtomicsPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__compute_extended__typed_reads_per_cache_line__read;
         counter->name = "TypedReadsPerCacheLine";
         counter->desc = "The ratio of EU typed read requests to L3 cache line reads.";
         counter->symbol_name = "TypedReadsPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__compute_extended__typed_writes_per_cache_line__read;
         counter->name = "TypedWritesPerCacheLine";
         counter->desc = "The ratio of EU typed write requests to L3 cache line writes.";
         counter->symbol_name = "TypedWritesPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 144;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_extended__untyped_reads_per_cache_line__read;
      counter->name = "UntypedReadsPerCacheLine";
      counter->desc = "The ratio of EU untyped read requests to L3 cache line reads.";
      counter->symbol_name = "UntypedReadsPerCacheLine";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 148;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__compute_extended__untyped_writes_per_cache_line__read;
         counter->name = "UntypedWritesPerCacheLine";
         counter->desc = "The ratio of EU untyped write requests to L3 cache line writes.";
         counter->symbol_name = "UntypedWritesPerCacheLine";
         counter->category = "L3/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 152;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_compute_l3_cache_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics L3 Cache set";
   query->symbol_name = "ComputeL3Cache";
   query->guid = "89e1f7ae-1100-4b4f-92f3-0caf88e3d833";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 57);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14120700 },
         { .reg = 0x00009888, .val = 0x121500E0 },
         { .reg = 0x00009888, .val = 0x10040154 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E040055 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600F0 },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x0C120023 },
         { .reg = 0x00009888, .val = 0x0E12152B },
         { .reg = 0x00009888, .val = 0x00121480 },
         { .reg = 0x00009888, .val = 0x02120028 },
         { .reg = 0x00009888, .val = 0x10120000 },
         { .reg = 0x00009888, .val = 0x18120000 },
         { .reg = 0x00009888, .val = 0x1A120000 },
         { .reg = 0x00009888, .val = 0x04120000 },
         { .reg = 0x00009888, .val = 0x0A13D000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0C144000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x0C150980 },
         { .reg = 0x00009888, .val = 0x02154D80 },
         { .reg = 0x00009888, .val = 0x04154C9A },
         { .reg = 0x00009888, .val = 0x06150018 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x16150800 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x08150000 },
         { .reg = 0x00009888, .val = 0x0A150000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x0A1473A0 },
         { .reg = 0x00009888, .val = 0x1217241C },
         { .reg = 0x00009888, .val = 0x2000F000 },
         { .reg = 0x00009888, .val = 0x1C134000 },
         { .reg = 0x00009888, .val = 0x1E130015 },
         { .reg = 0x00009888, .val = 0x061434A0 },
         { .reg = 0x00009888, .val = 0x0E140000 },
         { .reg = 0x00009888, .val = 0x0C143000 },
         { .reg = 0x00009888, .val = 0x0816C000 },
         { .reg = 0x00009888, .val = 0x0A16C000 },
         { .reg = 0x00009888, .val = 0x081710B0 },
         { .reg = 0x00009888, .val = 0x00170000 },
         { .reg = 0x00009888, .val = 0x161A2000 },
         { .reg = 0x00009888, .val = 0x181A1500 },
         { .reg = 0x00009888, .val = 0x501F1124 },
         { .reg = 0x00009888, .val = 0x521F4849 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x461F0100 },
         { .reg = 0x00009888, .val = 0x481F1101 },
         { .reg = 0x00009888, .val = 0x4A1F0011 },
         { .reg = 0x00009888, .val = 0x4C1F0030 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F0000 },
         { .reg = 0x00009888, .val = 0x421F3030 },
         { .reg = 0x00009888, .val = 0x441F3030 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00000003 },
         { .reg = 0x0000E658, .val = 0x00002001 },
         { .reg = 0x0000E758, .val = 0x00101100 },
         { .reg = 0x0000E45C, .val = 0x00201200 },
         { .reg = 0x0000E55C, .val = 0x00301300 },
         { .reg = 0x0000E65C, .val = 0x00401400 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__compute_l3_cache__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__fpu0_active__read;
      counter->name = "EU FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu0Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__fpu1_active__read;
      counter->name = "EU FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "Fpu1Active";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_hybrid_fpu0_instruction__read;
      counter->name = "EU FPU0 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuHybridFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_hybrid_fpu1_instruction__read;
      counter->name = "EU FPU1 Hybrid Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing hybrid instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuHybridFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_ternary_fpu0_instruction__read;
      counter->name = "EU FPU0 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_ternary_fpu1_instruction__read;
      counter->name = "EU FPU1 Ternary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing ternary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuTernaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_binary_fpu0_instruction__read;
      counter->name = "EU FPU0 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 124;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_binary_fpu1_instruction__read;
      counter->name = "EU FPU1 Binary Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing binary instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuBinaryFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_move_fpu0_instruction__read;
      counter->name = "EU FPU0 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU0. Unit: percent.";
      counter->symbol_name = "EuMoveFpu0Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 132;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__compute_l3_cache__eu_move_fpu1_instruction__read;
      counter->name = "EU FPU1 Move Instruction";
      counter->desc = "The percentage of time in which execution units were actively processing move instructions on FPU1. Unit: percent.";
      counter->symbol_name = "EuMoveFpu1Instruction";
      counter->category = "EU Array/Pipes/Instructions";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__sampler_accesses__read;
      counter->name = "Sampler Accesses";
      counter->desc = "The total number of messages send to samplers. Unit: messages.";
      counter->symbol_name = "SamplerAccesses";
      counter->category = "Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 256;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank00_accesses__read;
         counter->name = "Slice0 L3 Bank0 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank0. Unit: messages.";
         counter->symbol_name = "L3Bank00Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 264;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank01_accesses__read;
         counter->name = "Slice0 L3 Bank1 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank1. Unit: messages.";
         counter->symbol_name = "L3Bank01Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 272;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank02_accesses__read;
         counter->name = "Slice0 L3 Bank2 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank2. Unit: messages.";
         counter->symbol_name = "L3Bank02Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 280;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank03_accesses__read;
         counter->name = "Slice0 L3 Bank3 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank3. Unit: messages.";
         counter->symbol_name = "L3Bank03Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 288;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank04_accesses__read;
         counter->name = "Slice0 L3 Bank4 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank4. Unit: messages.";
         counter->symbol_name = "L3Bank04Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 296;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank05_accesses__read;
         counter->name = "Slice0 L3 Bank5 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank5. Unit: messages.";
         counter->symbol_name = "L3Bank05Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 304;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank06_accesses__read;
         counter->name = "Slice0 L3 Bank6 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank6. Unit: messages.";
         counter->symbol_name = "L3Bank06Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 312;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_bank07_accesses__read;
         counter->name = "Slice0 L3 Bank7 Accesses";
         counter->desc = "The total number of accesses to Slice0 L3 Bank7. Unit: messages.";
         counter->symbol_name = "L3Bank07Accesses";
         counter->category = "L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
         counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
         counter->raw_max = 0 /* undefined */;
         counter->offset = 320;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_accesses__read;
      counter->name = "L3 Accesses";
      counter->desc = "The total number of L3 accesses from all entities. Unit: messages.";
      counter->symbol_name = "L3Accesses";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 328;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_lookups__read;
      counter->name = "L3 Lookup Accesses w/o IC";
      counter->desc = "The total number of L3 cache lookup accesses w/o IC. Unit: messages.";
      counter->symbol_name = "L3Lookups";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 336;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_misses__read;
      counter->name = "L3 Misses";
      counter->desc = "The total number of L3 misses. Unit: messages.";
      counter->symbol_name = "L3Misses";
      counter->category = "L3/TAG";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 344;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_sampler_throughput__read;
      counter->name = "L3 Sampler Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between samplers and L3 caches. Unit: bytes.";
      counter->symbol_name = "L3SamplerThroughput";
      counter->category = "L3/Sampler";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 352;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 360;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__l3_total_throughput__read;
      counter->name = "L3 Total Throughput";
      counter->desc = "The total number of GPU memory bytes transferred via L3. Unit: bytes.";
      counter->symbol_name = "L3TotalThroughput";
      counter->category = "L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 368;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__gti_l3_throughput__read;
      counter->name = "GTI L3 Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between L3 caches and GTI. Unit: bytes.";
      counter->symbol_name = "GtiL3Throughput";
      counter->category = "GTI/L3";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 376;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 384;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__compute_l3_cache__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 392;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "7bfa0a68-c327-46e8-a556-cfa7b4651071";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x101E001E },
         { .reg = 0x00009888, .val = 0x10160000 },
         { .reg = 0x00009888, .val = 0x1017001F },
         { .reg = 0x00009888, .val = 0x0A1E0500 },
         { .reg = 0x00009888, .val = 0x1E1E0000 },
         { .reg = 0x00009888, .val = 0x1A200100 },
         { .reg = 0x00009888, .val = 0x10040015 },
         { .reg = 0x00009888, .val = 0x0E054000 },
         { .reg = 0x00009888, .val = 0x14050005 },
         { .reg = 0x00009888, .val = 0x26060038 },
         { .reg = 0x00009888, .val = 0x16157E00 },
         { .reg = 0x00009888, .val = 0x0C160022 },
         { .reg = 0x00009888, .val = 0x08160000 },
         { .reg = 0x00009888, .val = 0x0C170540 },
         { .reg = 0x00009888, .val = 0x04170000 },
         { .reg = 0x00009888, .val = 0x0A188000 },
         { .reg = 0x00009888, .val = 0x0C181000 },
         { .reg = 0x00009888, .val = 0x12195000 },
         { .reg = 0x00009888, .val = 0x14190001 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x100A0019 },
         { .reg = 0x00009888, .val = 0x140A001F },
         { .reg = 0x00009888, .val = 0x180B0000 },
         { .reg = 0x00009888, .val = 0x160C0000 },
         { .reg = 0x00009888, .val = 0x1C020000 },
         { .reg = 0x00009888, .val = 0x1806801F },
         { .reg = 0x00009888, .val = 0x1007001E },
         { .reg = 0x00009888, .val = 0x12080000 },
         { .reg = 0x00009888, .val = 0x0A0A0047 },
         { .reg = 0x00009888, .val = 0x020A0002 },
         { .reg = 0x00009888, .val = 0x040A0043 },
         { .reg = 0x00009888, .val = 0x200A0000 },
         { .reg = 0x00009888, .val = 0x1E0A0000 },
         { .reg = 0x00009888, .val = 0x180A0000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009888, .val = 0x080B3100 },
         { .reg = 0x00009888, .val = 0x020B3200 },
         { .reg = 0x00009888, .val = 0x100B0000 },
         { .reg = 0x00009888, .val = 0x040C2900 },
         { .reg = 0x00009888, .val = 0x000C0000 },
         { .reg = 0x00009888, .val = 0x100200B1 },
         { .reg = 0x00009888, .val = 0x020200B3 },
         { .reg = 0x00009888, .val = 0x08020000 },
         { .reg = 0x00009888, .val = 0x00020000 },
         { .reg = 0x00009888, .val = 0x00034000 },
         { .reg = 0x00009888, .val = 0x08038000 },
         { .reg = 0x00009888, .val = 0x0E034000 },
         { .reg = 0x00009888, .val = 0x02038000 },
         { .reg = 0x00009888, .val = 0x1C04FFB6 },
         { .reg = 0x00009888, .val = 0x1E04E039 },
         { .reg = 0x00009888, .val = 0x0C059000 },
         { .reg = 0x00009888, .val = 0x10050042 },
         { .reg = 0x00009888, .val = 0x06060095 },
         { .reg = 0x00009888, .val = 0x1E060084 },
         { .reg = 0x00009888, .val = 0x1C068000 },
         { .reg = 0x00009888, .val = 0x14068000 },
         { .reg = 0x00009888, .val = 0x1A068000 },
         { .reg = 0x00009888, .val = 0x00070022 },
         { .reg = 0x00009888, .val = 0x0E070023 },
         { .reg = 0x00009888, .val = 0x08078000 },
         { .reg = 0x00009888, .val = 0x02078000 },
         { .reg = 0x00009888, .val = 0x06080100 },
         { .reg = 0x00009888, .val = 0x0E080300 },
         { .reg = 0x00009888, .val = 0x1C080004 },
         { .reg = 0x00009888, .val = 0x1A082000 },
         { .reg = 0x00009888, .val = 0x14088000 },
         { .reg = 0x00009888, .val = 0x18088000 },
         { .reg = 0x00009888, .val = 0x1C090800 },
         { .reg = 0x00009888, .val = 0x16098000 },
         { .reg = 0x00009888, .val = 0x301F6000 },
         { .reg = 0x00009888, .val = 0x501F36DB },
         { .reg = 0x00009888, .val = 0x521F491B },
         { .reg = 0x00009888, .val = 0x541F001B },
         { .reg = 0x00009888, .val = 0x3A1F6000 },
         { .reg = 0x00009888, .val = 0x4E1F1B41 },
         { .reg = 0x00009888, .val = 0x3E1F3100 },
         { .reg = 0x00009888, .val = 0x461F5141 },
         { .reg = 0x00009888, .val = 0x481F3111 },
         { .reg = 0x00009888, .val = 0x4A1F1000 },
         { .reg = 0x00009888, .val = 0x4C1F3120 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x401F3151 },
         { .reg = 0x00009888, .val = 0x421F3111 },
         { .reg = 0x00009888, .val = 0x441F5121 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 212;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__early_depth_bottleneck__read;
      counter->name = "Early Depth Bottleneck";
      counter->desc = "The percentage of time in which early depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "EarlyDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "697eadd2-bc54-459f-b634-fcb3d4a2d627";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1223000A },
         { .reg = 0x00009888, .val = 0x10230009 },
         { .reg = 0x00009888, .val = 0x1263000A },
         { .reg = 0x00009888, .val = 0x10630009 },
         { .reg = 0x00009888, .val = 0x12A3000A },
         { .reg = 0x00009888, .val = 0x10A30009 },
         { .reg = 0x00009888, .val = 0x12E3000A },
         { .reg = 0x00009888, .val = 0x10E30009 },
         { .reg = 0x00009888, .val = 0x10150019 },
         { .reg = 0x00009888, .val = 0x1A20A000 },
         { .reg = 0x00009888, .val = 0x0C231900 },
         { .reg = 0x00009888, .val = 0x0E23003C },
         { .reg = 0x00009888, .val = 0x00230000 },
         { .reg = 0x00009888, .val = 0x1C600002 },
         { .reg = 0x00009888, .val = 0x02608000 },
         { .reg = 0x00009888, .val = 0x0E631900 },
         { .reg = 0x00009888, .val = 0x00631E00 },
         { .reg = 0x00009888, .val = 0x04A08000 },
         { .reg = 0x00009888, .val = 0x06A08000 },
         { .reg = 0x00009888, .val = 0x02A31E32 },
         { .reg = 0x00009888, .val = 0x00A30000 },
         { .reg = 0x00009888, .val = 0x08E08000 },
         { .reg = 0x00009888, .val = 0x0AE08000 },
         { .reg = 0x00009888, .val = 0x04E31E32 },
         { .reg = 0x00009888, .val = 0x00E30000 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x10040150 },
         { .reg = 0x00009888, .val = 0x0E040015 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x14050054 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x24063F00 },
         { .reg = 0x00009888, .val = 0x260600E0 },
         { .reg = 0x00009888, .val = 0x0E088000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x14090040 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x080E5000 },
         { .reg = 0x00009888, .val = 0x1A120020 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x0412C000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0A138000 },
         { .reg = 0x00009888, .val = 0x0413E000 },
         { .reg = 0x00009888, .val = 0x0E148000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x0214C000 },
         { .reg = 0x00009888, .val = 0x00150047 },
         { .reg = 0x00009888, .val = 0x16156000 },
         { .reg = 0x00009888, .val = 0x18150007 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C188000 },
         { .reg = 0x00009888, .val = 0x0E182000 },
         { .reg = 0x00009888, .val = 0x1419000A },
         { .reg = 0x00009888, .val = 0x021CA000 },
         { .reg = 0x00009888, .val = 0x0A1DA000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3061 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x521F4000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x501F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0100 },
         { .reg = 0x00009888, .val = 0x4C1F3030 },
         { .reg = 0x00009888, .val = 0x401F0000 },
         { .reg = 0x00009888, .val = 0x421F3000 },
         { .reg = 0x00009888, .val = 0x441F3330 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000002 },
         { .reg = 0x00002774, .val = 0x0000FFFE },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 3) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "Slice0 Subslice group 0 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Subslice group 0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 12) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "Slice0 Subslice group 1 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Subslice group 1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 48) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "Slice0 Subslice group 2 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Subslice group 2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 192) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read;
         counter->name = "Slice0 Subslice group 3 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Subslice group 3) Unit: percent.";
         counter->symbol_name = "NonSamplerShader03AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__hdc_and_sf__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "5cda6ba5-f3b7-43a4-8027-8d0958a263c4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x120A03E0 },
         { .reg = 0x00009888, .val = 0x140C0500 },
         { .reg = 0x00009888, .val = 0x120F03E0 },
         { .reg = 0x00009888, .val = 0x14110500 },
         { .reg = 0x00009888, .val = 0x10136000 },
         { .reg = 0x00009888, .val = 0x1213001C },
         { .reg = 0x00009888, .val = 0x16140000 },
         { .reg = 0x00009888, .val = 0x10160018 },
         { .reg = 0x00009888, .val = 0x101A0018 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045545 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E051000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08051000 },
         { .reg = 0x00009888, .val = 0x2406DF00 },
         { .reg = 0x00009888, .val = 0x26060007 },
         { .reg = 0x00009888, .val = 0x02084000 },
         { .reg = 0x00009888, .val = 0x04098000 },
         { .reg = 0x00009888, .val = 0x06094000 },
         { .reg = 0x00009888, .val = 0x000A0C40 },
         { .reg = 0x00009888, .val = 0x0C0A0000 },
         { .reg = 0x00009888, .val = 0x040A0000 },
         { .reg = 0x00009888, .val = 0x020C0B80 },
         { .reg = 0x00009888, .val = 0x080C0000 },
         { .reg = 0x00009888, .val = 0x040D1000 },
         { .reg = 0x00009888, .val = 0x060E2000 },
         { .reg = 0x00009888, .val = 0x080E1000 },
         { .reg = 0x00009888, .val = 0x020F0031 },
         { .reg = 0x00009888, .val = 0x0C0F0000 },
         { .reg = 0x00009888, .val = 0x040F0000 },
         { .reg = 0x00009888, .val = 0x04110017 },
         { .reg = 0x00009888, .val = 0x08110000 },
         { .reg = 0x00009888, .val = 0x18120C00 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x06128000 },
         { .reg = 0x00009888, .val = 0x0613092F },
         { .reg = 0x00009888, .val = 0x08134025 },
         { .reg = 0x00009888, .val = 0x0C130000 },
         { .reg = 0x00009888, .val = 0x0413A000 },
         { .reg = 0x00009888, .val = 0x00148066 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x0614C000 },
         { .reg = 0x00009888, .val = 0x08144000 },
         { .reg = 0x00009888, .val = 0x0A144000 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x00154000 },
         { .reg = 0x00009888, .val = 0x0E154000 },
         { .reg = 0x00009888, .val = 0x161500E8 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04158000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0C154000 },
         { .reg = 0x00009888, .val = 0x08161000 },
         { .reg = 0x00009888, .val = 0x08184000 },
         { .reg = 0x00009888, .val = 0x12190100 },
         { .reg = 0x00009888, .val = 0x0A1A0020 },
         { .reg = 0x00009888, .val = 0x081A0000 },
         { .reg = 0x00009888, .val = 0x0A1C1000 },
         { .reg = 0x00009888, .val = 0x121D0400 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3000 },
         { .reg = 0x00009888, .val = 0x1A1F0061 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4904 },
         { .reg = 0x00009888, .val = 0x521F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x3E1F5000 },
         { .reg = 0x00009888, .val = 0x461F4040 },
         { .reg = 0x00009888, .val = 0x481F0030 },
         { .reg = 0x00009888, .val = 0x401F4010 },
         { .reg = 0x00009888, .val = 0x421F3000 },
         { .reg = 0x00009888, .val = 0x441F4023 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x70800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000038 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000002 },
         { .reg = 0x0000277C, .val = 0x0000FFF7 },
         { .reg = 0x00002780, .val = 0x00000002 },
         { .reg = 0x00002784, .val = 0x0000FFEF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__pixel_data00_ready__read;
      counter->name = "Slice0 Pipe0 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData00Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__pixel_data01_ready__read;
      counter->name = "Slice0 Pipe1 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe1 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData01Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__ps_output00_available__read;
      counter->name = "Slice0 Pipe0 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe0 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput00Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__ps_output01_available__read;
      counter->name = "Slice0 Pipe1 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe1 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput01Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__pixel_values00_ready__read;
      counter->name = "Slice0 Pipe0 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe0 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues00Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__pixel_values01_ready__read;
      counter->name = "Slice0 Pipe1 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe1 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues01Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__rasterizer_and_pixel_backend__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set L3_1";
   query->symbol_name = "L3_1";
   query->guid = "834bfd76-48d7-4552-81fa-ba1f21d6014c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x17340000 },
         { .reg = 0x00009888, .val = 0x17740000 },
         { .reg = 0x00009888, .val = 0x17B40000 },
         { .reg = 0x00009888, .val = 0x17F40000 },
         { .reg = 0x00009888, .val = 0x16340000 },
         { .reg = 0x00009888, .val = 0x16740000 },
         { .reg = 0x00009888, .val = 0x16B40000 },
         { .reg = 0x00009888, .val = 0x16F40000 },
         { .reg = 0x00009888, .val = 0x07340017 },
         { .reg = 0x00009888, .val = 0x27340000 },
         { .reg = 0x00009888, .val = 0x09740017 },
         { .reg = 0x00009888, .val = 0x27740000 },
         { .reg = 0x00009888, .val = 0x0BB40017 },
         { .reg = 0x00009888, .val = 0x27B40000 },
         { .reg = 0x00009888, .val = 0x0DF40017 },
         { .reg = 0x00009888, .val = 0x27F40000 },
         { .reg = 0x00009888, .val = 0x0E0000A7 },
         { .reg = 0x00009888, .val = 0x08012000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x0C038000 },
         { .reg = 0x00009888, .val = 0x1C340017 },
         { .reg = 0x00009888, .val = 0x26340000 },
         { .reg = 0x00009888, .val = 0x1E740017 },
         { .reg = 0x00009888, .val = 0x26740000 },
         { .reg = 0x00009888, .val = 0x02B40017 },
         { .reg = 0x00009888, .val = 0x26B40000 },
         { .reg = 0x00009888, .val = 0x04F40017 },
         { .reg = 0x00009888, .val = 0x26F40000 },
         { .reg = 0x00009888, .val = 0x10040200 },
         { .reg = 0x00009888, .val = 0x0E040055 },
         { .reg = 0x00009888, .val = 0x14050070 },
         { .reg = 0x00009888, .val = 0x04058000 },
         { .reg = 0x00009888, .val = 0x06056000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600C0 },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x02074000 },
         { .reg = 0x00009888, .val = 0x04078000 },
         { .reg = 0x00009888, .val = 0x06124000 },
         { .reg = 0x00009888, .val = 0x08124000 },
         { .reg = 0x00009888, .val = 0x0A124000 },
         { .reg = 0x00009888, .val = 0x0C124000 },
         { .reg = 0x00009888, .val = 0x04138000 },
         { .reg = 0x00009888, .val = 0x06137000 },
         { .reg = 0x00009888, .val = 0x02148000 },
         { .reg = 0x00009888, .val = 0x0414C000 },
         { .reg = 0x00009888, .val = 0x06144000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08154000 },
         { .reg = 0x00009888, .val = 0x0A154000 },
         { .reg = 0x00009888, .val = 0x0C154000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3000 },
         { .reg = 0x00009888, .val = 0x2A1F0061 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x521F0000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2460 },
         { .reg = 0x00009888, .val = 0x501F0124 },
         { .reg = 0x00009888, .val = 0x4C1F0133 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F6060 },
         { .reg = 0x00009888, .val = 0x421F0010 },
         { .reg = 0x00009888, .val = 0x441F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank0_active__read;
         counter->name = "Slice0 L3 Bank0 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is active Unit: percent.";
         counter->symbol_name = "L30Bank0Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank1_active__read;
         counter->name = "Slice0 L3 Bank1 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is active Unit: percent.";
         counter->symbol_name = "L30Bank1Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank2_active__read;
         counter->name = "Slice0 L3 Bank2 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank2 is active Unit: percent.";
         counter->symbol_name = "L30Bank2Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank3_active__read;
         counter->name = "Slice0 L3 Bank3 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank3 is active Unit: percent.";
         counter->symbol_name = "L30Bank3Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank4_active__read;
         counter->name = "Slice0 L3 Bank4 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank4 is active Unit: percent.";
         counter->symbol_name = "L30Bank4Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank5_active__read;
         counter->name = "Slice0 L3 Bank5 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank5 is active Unit: percent.";
         counter->symbol_name = "L30Bank5Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank6_active__read;
         counter->name = "Slice0 L3 Bank6 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank6 is active Unit: percent.";
         counter->symbol_name = "L30Bank6Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_1__l30_bank7_active__read;
         counter->name = "Slice0 L3 Bank7 Active";
         counter->desc = "The percentage of time in which slice0 L3 bank7 is active Unit: percent.";
         counter->symbol_name = "L30Bank7Active";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_1__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank0 stalled metric set";
   query->symbol_name = "L3_2";
   query->guid = "f3da54bc-da93-4008-bafc-daebdd79ab40";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10050C00 },
         { .reg = 0x00009888, .val = 0x12050002 },
         { .reg = 0x00009888, .val = 0x00050025 },
         { .reg = 0x00009888, .val = 0x06050900 },
         { .reg = 0x00009888, .val = 0x080508EA },
         { .reg = 0x00009888, .val = 0x0A0508AB },
         { .reg = 0x00009888, .val = 0x0C050A21 },
         { .reg = 0x00009888, .val = 0x0E050A60 },
         { .reg = 0x00009888, .val = 0x04050000 },
         { .reg = 0x00009888, .val = 0x14050000 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_2__l30_bank0_stalled__read;
         counter->name = "Slice0 L3 Bank0 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank0 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank0Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank1 stalled metric set";
   query->symbol_name = "L3_3";
   query->guid = "fb4664db-4a4b-4d45-8f38-ceab6eac078c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10050400 },
         { .reg = 0x00009888, .val = 0x12050000 },
         { .reg = 0x00009888, .val = 0x00050025 },
         { .reg = 0x00009888, .val = 0x06050900 },
         { .reg = 0x00009888, .val = 0x080508EA },
         { .reg = 0x00009888, .val = 0x0A0508AB },
         { .reg = 0x00009888, .val = 0x0C050A21 },
         { .reg = 0x00009888, .val = 0x0E050A60 },
         { .reg = 0x00009888, .val = 0x04050000 },
         { .reg = 0x00009888, .val = 0x14050000 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_3__l30_bank1_stalled__read;
         counter->name = "Slice0 L3 Bank1 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank1 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank1Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank4 stalled metric set";
   query->symbol_name = "L3_4";
   query->guid = "0092ff25-7595-4671-b21a-d7d94a19a38b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10010C00 },
         { .reg = 0x00009888, .val = 0x12010002 },
         { .reg = 0x00009888, .val = 0x0C000400 },
         { .reg = 0x00009888, .val = 0x0E005500 },
         { .reg = 0x00009888, .val = 0x10000155 },
         { .reg = 0x00009888, .val = 0x00010025 },
         { .reg = 0x00009888, .val = 0x06010900 },
         { .reg = 0x00009888, .val = 0x080108EA },
         { .reg = 0x00009888, .val = 0x0A0108AB },
         { .reg = 0x00009888, .val = 0x0C010A21 },
         { .reg = 0x00009888, .val = 0x0E010A60 },
         { .reg = 0x00009888, .val = 0x04010000 },
         { .reg = 0x00009888, .val = 0x14010000 },
         { .reg = 0x00009888, .val = 0x0C040400 },
         { .reg = 0x00009888, .val = 0x0E045500 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04051000 },
         { .reg = 0x00009888, .val = 0x0A054000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x18125540 },
         { .reg = 0x00009888, .val = 0x1A120015 },
         { .reg = 0x00009888, .val = 0x04131000 },
         { .reg = 0x00009888, .val = 0x06138000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x00144000 },
         { .reg = 0x00009888, .val = 0x06148000 },
         { .reg = 0x00009888, .val = 0x0814C000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00154000 },
         { .reg = 0x00009888, .val = 0x0E154000 },
         { .reg = 0x00009888, .val = 0x16152AA8 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_4__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_4__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_4__l30_bank4_stalled__read;
         counter->name = "Slice0 L3 Bank4 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank4 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank4Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_l3_5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L2Bank5 stalled metric set";
   query->symbol_name = "L3_5";
   query->guid = "079c833a-21cb-4832-bc9a-3ea6fc42f516";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x10010400 },
         { .reg = 0x00009888, .val = 0x12010000 },
         { .reg = 0x00009888, .val = 0x0C000400 },
         { .reg = 0x00009888, .val = 0x0E005500 },
         { .reg = 0x00009888, .val = 0x10000155 },
         { .reg = 0x00009888, .val = 0x00010022 },
         { .reg = 0x00009888, .val = 0x06010840 },
         { .reg = 0x00009888, .val = 0x08010828 },
         { .reg = 0x00009888, .val = 0x0A010969 },
         { .reg = 0x00009888, .val = 0x0C010AA4 },
         { .reg = 0x00009888, .val = 0x0E010AE3 },
         { .reg = 0x00009888, .val = 0x04010000 },
         { .reg = 0x00009888, .val = 0x14010000 },
         { .reg = 0x00009888, .val = 0x0C040400 },
         { .reg = 0x00009888, .val = 0x0E045500 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04051000 },
         { .reg = 0x00009888, .val = 0x0A054000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x24068100 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x18125540 },
         { .reg = 0x00009888, .val = 0x1A120015 },
         { .reg = 0x00009888, .val = 0x04131000 },
         { .reg = 0x00009888, .val = 0x06138000 },
         { .reg = 0x00009888, .val = 0x0813F000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x00144000 },
         { .reg = 0x00009888, .val = 0x06148000 },
         { .reg = 0x00009888, .val = 0x0814C000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00154000 },
         { .reg = 0x00009888, .val = 0x0E154000 },
         { .reg = 0x00009888, .val = 0x16152AA8 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4800 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3E1F0000 },
         { .reg = 0x00009888, .val = 0x461F0000 },
         { .reg = 0x00009888, .val = 0x481F0000 },
         { .reg = 0x00009888, .val = 0x4A1F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x4E1F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x00000022 },
         { .reg = 0x00002774, .val = 0x0000FFF8 },
         { .reg = 0x00002778, .val = 0x00000082 },
         { .reg = 0x0000277C, .val = 0x0000FFE7 },
         { .reg = 0x00002780, .val = 0x00000402 },
         { .reg = 0x00002784, .val = 0x0000FF1F },
         { .reg = 0x00002788, .val = 0x00001002 },
         { .reg = 0x0000278C, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00008003 },
         { .reg = 0x0000E658, .val = 0x00011010 },
         { .reg = 0x0000E758, .val = 0x00050012 },
         { .reg = 0x0000E45C, .val = 0x00052051 },
         { .reg = 0x0000E55C, .val = 0x00000053 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__l3_5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__l3_5__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 156;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 164;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 172;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__l3_5__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 176;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__l3_5__l30_bank5_stalled__read;
         counter->name = "Slice0 L3 Bank5 Stalled";
         counter->desc = "The percentage of time in which slice0 L3 bank5 is stalled Unit: percent.";
         counter->symbol_name = "L30Bank5Stalled";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 180;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler 1";
   query->symbol_name = "Sampler_1";
   query->guid = "bf151068-b27b-4851-b46c-627c132f1cb4";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x142A0165 },
         { .reg = 0x00009888, .val = 0x142F0165 },
         { .reg = 0x00009888, .val = 0x146A0165 },
         { .reg = 0x00009888, .val = 0x146F0165 },
         { .reg = 0x00009888, .val = 0x14AA0165 },
         { .reg = 0x00009888, .val = 0x14AF0165 },
         { .reg = 0x00009888, .val = 0x14EA0165 },
         { .reg = 0x00009888, .val = 0x14EF0165 },
         { .reg = 0x00009888, .val = 0x161E8000 },
         { .reg = 0x00009888, .val = 0x181E8000 },
         { .reg = 0x00009888, .val = 0x1C1E1800 },
         { .reg = 0x00009888, .val = 0x06204000 },
         { .reg = 0x00009888, .val = 0x08204000 },
         { .reg = 0x00009888, .val = 0x0A204000 },
         { .reg = 0x00009888, .val = 0x0C204000 },
         { .reg = 0x00009888, .val = 0x16218000 },
         { .reg = 0x00009888, .val = 0x18218000 },
         { .reg = 0x00009888, .val = 0x1E210018 },
         { .reg = 0x00009888, .val = 0x042A1800 },
         { .reg = 0x00009888, .val = 0x062AC038 },
         { .reg = 0x00009888, .val = 0x102A0000 },
         { .reg = 0x00009888, .val = 0x082AC000 },
         { .reg = 0x00009888, .val = 0x0A2A0000 },
         { .reg = 0x00009888, .val = 0x0C2A0000 },
         { .reg = 0x00009888, .val = 0x062C4000 },
         { .reg = 0x00009888, .val = 0x082C4000 },
         { .reg = 0x00009888, .val = 0x0A2C4000 },
         { .reg = 0x00009888, .val = 0x0C2C4000 },
         { .reg = 0x00009888, .val = 0x022F2000 },
         { .reg = 0x00009888, .val = 0x042F0048 },
         { .reg = 0x00009888, .val = 0x102F0000 },
         { .reg = 0x00009888, .val = 0x062F0000 },
         { .reg = 0x00009888, .val = 0x082F0000 },
         { .reg = 0x00009888, .val = 0x1E5E0030 },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x145E8000 },
         { .reg = 0x00009888, .val = 0x1A604000 },
         { .reg = 0x00009888, .val = 0x1C600001 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x04604000 },
         { .reg = 0x00009888, .val = 0x1E613000 },
         { .reg = 0x00009888, .val = 0x12618000 },
         { .reg = 0x00009888, .val = 0x14618000 },
         { .reg = 0x00009888, .val = 0x006A1800 },
         { .reg = 0x00009888, .val = 0x026A0038 },
         { .reg = 0x00009888, .val = 0x106A0000 },
         { .reg = 0x00009888, .val = 0x1A6A00F0 },
         { .reg = 0x00009888, .val = 0x046A0000 },
         { .reg = 0x00009888, .val = 0x186C5000 },
         { .reg = 0x00009888, .val = 0x026C4000 },
         { .reg = 0x00009888, .val = 0x046C4000 },
         { .reg = 0x00009888, .val = 0x0E6F2440 },
         { .reg = 0x00009888, .val = 0x106F0000 },
         { .reg = 0x00009888, .val = 0x1A6F0000 },
         { .reg = 0x00009888, .val = 0x1E9E000F },
         { .reg = 0x00009888, .val = 0x1AA01540 },
         { .reg = 0x00009888, .val = 0x1EA10F00 },
         { .reg = 0x00009888, .val = 0x0CAA1C30 },
         { .reg = 0x00009888, .val = 0x10AA0000 },
         { .reg = 0x00009888, .val = 0x18AAF000 },
         { .reg = 0x00009888, .val = 0x1AAA0000 },
         { .reg = 0x00009888, .val = 0x18AC0550 },
         { .reg = 0x00009888, .val = 0x0AAF2440 },
         { .reg = 0x00009888, .val = 0x10AF0000 },
         { .reg = 0x00009888, .val = 0x1AAF0000 },
         { .reg = 0x00009888, .val = 0x10DE8000 },
         { .reg = 0x00009888, .val = 0x1CDEE000 },
         { .reg = 0x00009888, .val = 0x00E04000 },
         { .reg = 0x00009888, .val = 0x0EE04000 },
         { .reg = 0x00009888, .val = 0x18E04000 },
         { .reg = 0x00009888, .val = 0x1AE00010 },
         { .reg = 0x00009888, .val = 0x10E18000 },
         { .reg = 0x00009888, .val = 0x1EE100E0 },
         { .reg = 0x00009888, .val = 0x08EA1C30 },
         { .reg = 0x00009888, .val = 0x10EA0000 },
         { .reg = 0x00009888, .val = 0x00EAC000 },
         { .reg = 0x00009888, .val = 0x0EEAC000 },
         { .reg = 0x00009888, .val = 0x18EA0000 },
         { .reg = 0x00009888, .val = 0x00EC4000 },
         { .reg = 0x00009888, .val = 0x0EEC4000 },
         { .reg = 0x00009888, .val = 0x18EC0005 },
         { .reg = 0x00009888, .val = 0x00EF0040 },
         { .reg = 0x00009888, .val = 0x06EF2400 },
         { .reg = 0x00009888, .val = 0x10EF0000 },
         { .reg = 0x00009888, .val = 0x0EEF0000 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FF00 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x0E08A000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x02082000 },
         { .reg = 0x00009888, .val = 0x14090050 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x06091000 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x040E1000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x0C0E5000 },
         { .reg = 0x00009888, .val = 0x1812FC00 },
         { .reg = 0x00009888, .val = 0x1A12002B },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x0813C000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x00158000 },
         { .reg = 0x00009888, .val = 0x0E158000 },
         { .reg = 0x00009888, .val = 0x16152AD0 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x0615C000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A15C000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x02188000 },
         { .reg = 0x00009888, .val = 0x0418A000 },
         { .reg = 0x00009888, .val = 0x06182000 },
         { .reg = 0x00009888, .val = 0x0A198000 },
         { .reg = 0x00009888, .val = 0x0C19A000 },
         { .reg = 0x00009888, .val = 0x0E192000 },
         { .reg = 0x00009888, .val = 0x0A1CA000 },
         { .reg = 0x00009888, .val = 0x0C1CA000 },
         { .reg = 0x00009888, .val = 0x121D5400 },
         { .reg = 0x00009888, .val = 0x141D0002 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x3E1F3000 },
         { .reg = 0x00009888, .val = 0x461F4030 },
         { .reg = 0x00009888, .val = 0x481F0040 },
         { .reg = 0x00009888, .val = 0x4A1F1000 },
         { .reg = 0x00009888, .val = 0x4C1F0010 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x401F1010 },
         { .reg = 0x00009888, .val = 0x421F3030 },
         { .reg = 0x00009888, .val = 0x441F4040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00000018 },
         { .reg = 0x00002774, .val = 0x0000FFFC },
         { .reg = 0x00002778, .val = 0x00000060 },
         { .reg = 0x0000277C, .val = 0x0000FFF3 },
         { .reg = 0x00002780, .val = 0x00000180 },
         { .reg = 0x00002784, .val = 0x0000FFCF },
         { .reg = 0x00002788, .val = 0x00000600 },
         { .reg = 0x0000278C, .val = 0x0000FF3F },
         { .reg = 0x00002790, .val = 0x00001800 },
         { .reg = 0x00002794, .val = 0x0000FCFF },
         { .reg = 0x00002798, .val = 0x00006000 },
         { .reg = 0x0000279C, .val = 0x0000F3FF },
         { .reg = 0x000027A0, .val = 0x00018000 },
         { .reg = 0x000027A4, .val = 0x0000CFFF },
         { .reg = 0x000027A8, .val = 0x00060000 },
         { .reg = 0x000027AC, .val = 0x00003FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler00_input_available__read;
         counter->name = "Slice0 Subslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler01_input_available__read;
         counter->name = "Slice0 Subslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler02_input_available__read;
         counter->name = "Slice0 Subslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler03_input_available__read;
         counter->name = "Slice0 Subslice3 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice3 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler03InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler04_input_available__read;
         counter->name = "Slice0 Subslice4 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice4 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler04InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler05_input_available__read;
         counter->name = "Slice0 Subslice5 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice5 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler05InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler06_input_available__read;
         counter->name = "Slice0 Subslice6 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice6 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler06InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_1__sampler07_input_available__read;
         counter->name = "Slice0 Subslice7 Input Available";
         counter->desc = "The percentage of time in which slice0 subslice7 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler07InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_sampler_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set Sampler 2";
   query->symbol_name = "Sampler_2";
   query->guid = "5e64ae48-92a7-49b3-b7d8-af36b32ee866";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x142D0005 },
         { .reg = 0x00009888, .val = 0x14320005 },
         { .reg = 0x00009888, .val = 0x146D0005 },
         { .reg = 0x00009888, .val = 0x14720005 },
         { .reg = 0x00009888, .val = 0x14AD0005 },
         { .reg = 0x00009888, .val = 0x14B20005 },
         { .reg = 0x00009888, .val = 0x14ED0005 },
         { .reg = 0x00009888, .val = 0x14F20005 },
         { .reg = 0x00009888, .val = 0x1E1E0030 },
         { .reg = 0x00009888, .val = 0x1A204000 },
         { .reg = 0x00009888, .val = 0x1C200001 },
         { .reg = 0x00009888, .val = 0x1E213000 },
         { .reg = 0x00009888, .val = 0x1A2A00C0 },
         { .reg = 0x00009888, .val = 0x182C6000 },
         { .reg = 0x00009888, .val = 0x0E2D0033 },
         { .reg = 0x00009888, .val = 0x022D0000 },
         { .reg = 0x00009888, .val = 0x1A2F8000 },
         { .reg = 0x00009888, .val = 0x1C300008 },
         { .reg = 0x00009888, .val = 0x0E318000 },
         { .reg = 0x00009888, .val = 0x0E321980 },
         { .reg = 0x00009888, .val = 0x02320000 },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x145E8000 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x04604000 },
         { .reg = 0x00009888, .val = 0x12618000 },
         { .reg = 0x00009888, .val = 0x14618000 },
         { .reg = 0x00009888, .val = 0x046AC000 },
         { .reg = 0x00009888, .val = 0x026C8000 },
         { .reg = 0x00009888, .val = 0x046C4000 },
         { .reg = 0x00009888, .val = 0x006D1980 },
         { .reg = 0x00009888, .val = 0x026D0000 },
         { .reg = 0x00009888, .val = 0x046F8000 },
         { .reg = 0x00009888, .val = 0x14708000 },
         { .reg = 0x00009888, .val = 0x02714000 },
         { .reg = 0x00009888, .val = 0x02720033 },
         { .reg = 0x00009888, .val = 0x169E8000 },
         { .reg = 0x00009888, .val = 0x189E8000 },
         { .reg = 0x00009888, .val = 0x06A04000 },
         { .reg = 0x00009888, .val = 0x08A04000 },
         { .reg = 0x00009888, .val = 0x16A18000 },
         { .reg = 0x00009888, .val = 0x18A18000 },
         { .reg = 0x00009888, .val = 0x08AAC000 },
         { .reg = 0x00009888, .val = 0x06AC8000 },
         { .reg = 0x00009888, .val = 0x08AC4000 },
         { .reg = 0x00009888, .val = 0x02AD1980 },
         { .reg = 0x00009888, .val = 0x08AF8000 },
         { .reg = 0x00009888, .val = 0x18B04000 },
         { .reg = 0x00009888, .val = 0x04B14000 },
         { .reg = 0x00009888, .val = 0x04B20033 },
         { .reg = 0x00009888, .val = 0x02B20000 },
         { .reg = 0x00009888, .val = 0x1CDE1800 },
         { .reg = 0x00009888, .val = 0x0AE04000 },
         { .reg = 0x00009888, .val = 0x0CE04000 },
         { .reg = 0x00009888, .val = 0x1EE10018 },
         { .reg = 0x00009888, .val = 0x0CEAC000 },
         { .reg = 0x00009888, .val = 0x0AEC8000 },
         { .reg = 0x00009888, .val = 0x0CEC4000 },
         { .reg = 0x00009888, .val = 0x04ED1980 },
         { .reg = 0x00009888, .val = 0x02ED0000 },
         { .reg = 0x00009888, .val = 0x0CEF8000 },
         { .reg = 0x00009888, .val = 0x1AF00400 },
         { .reg = 0x00009888, .val = 0x06F14000 },
         { .reg = 0x00009888, .val = 0x06F20033 },
         { .reg = 0x00009888, .val = 0x02F20000 },
         { .reg = 0x00009888, .val = 0x10040140 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E040055 },
         { .reg = 0x00009888, .val = 0x14050050 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x0A051000 },
         { .reg = 0x00009888, .val = 0x260600C0 },
         { .reg = 0x00009888, .val = 0x24067E00 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x02082000 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x06091000 },
         { .reg = 0x00009888, .val = 0x040D8000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x080E4000 },
         { .reg = 0x00009888, .val = 0x0A0E1000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0812C000 },
         { .reg = 0x00009888, .val = 0x0413E000 },
         { .reg = 0x00009888, .val = 0x06131000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x0214C000 },
         { .reg = 0x00009888, .val = 0x04144000 },
         { .reg = 0x00009888, .val = 0x1815000F },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08154000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C158000 },
         { .reg = 0x00009888, .val = 0x0E18A000 },
         { .reg = 0x00009888, .val = 0x14190028 },
         { .reg = 0x00009888, .val = 0x021C8000 },
         { .reg = 0x00009888, .val = 0x041C2000 },
         { .reg = 0x00009888, .val = 0x0A1D8000 },
         { .reg = 0x00009888, .val = 0x0C1D2000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3000 },
         { .reg = 0x00009888, .val = 0x2A1F0061 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x521F0000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2430 },
         { .reg = 0x00009888, .val = 0x501F0124 },
         { .reg = 0x00009888, .val = 0x4C1F0137 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F0040 },
         { .reg = 0x00009888, .val = 0x421F0040 },
         { .reg = 0x00009888, .val = 0x441F3001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__sampler_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__sampler_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler00_output_ready__read;
         counter->name = "Slice0 Subslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler01_output_ready__read;
         counter->name = "Slice0 Subslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler02_output_ready__read;
         counter->name = "Slice0 Subslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler03_output_ready__read;
         counter->name = "Slice0 Subslice3 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice3 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler03OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler04_output_ready__read;
         counter->name = "Slice0 Subslice4 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice4 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler04OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler05_output_ready__read;
         counter->name = "Slice0 Subslice5 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice5 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler05OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler06_output_ready__read;
         counter->name = "Slice0 Subslice6 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice6 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler06OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__sampler_2__sampler07_output_ready__read;
         counter->name = "Slice0 Subslice7 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 subslice7 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler07OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__sampler_2__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "d1725c84-80c6-4488-9867-82b44c88fea6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 51);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14290000 },
         { .reg = 0x00009888, .val = 0x142E0000 },
         { .reg = 0x00009888, .val = 0x14690000 },
         { .reg = 0x00009888, .val = 0x146E0000 },
         { .reg = 0x00009888, .val = 0x14A90000 },
         { .reg = 0x00009888, .val = 0x14AE0000 },
         { .reg = 0x00009888, .val = 0x14E90000 },
         { .reg = 0x00009888, .val = 0x14EE0000 },
         { .reg = 0x00009888, .val = 0x161E8000 },
         { .reg = 0x00009888, .val = 0x181E8000 },
         { .reg = 0x00009888, .val = 0x1C1E1800 },
         { .reg = 0x00009888, .val = 0x06204000 },
         { .reg = 0x00009888, .val = 0x08204000 },
         { .reg = 0x00009888, .val = 0x0A204000 },
         { .reg = 0x00009888, .val = 0x0C204000 },
         { .reg = 0x00009888, .val = 0x16218000 },
         { .reg = 0x00009888, .val = 0x18218000 },
         { .reg = 0x00009888, .val = 0x1E210018 },
         { .reg = 0x00009888, .val = 0x0429C300 },
         { .reg = 0x00009888, .val = 0x062900C5 },
         { .reg = 0x00009888, .val = 0x10290000 },
         { .reg = 0x00009888, .val = 0x062AC000 },
         { .reg = 0x00009888, .val = 0x082AC000 },
         { .reg = 0x00009888, .val = 0x0A2A4000 },
         { .reg = 0x00009888, .val = 0x0C2A4000 },
         { .reg = 0x00009888, .val = 0x062C4000 },
         { .reg = 0x00009888, .val = 0x082C4000 },
         { .reg = 0x00009888, .val = 0x0A2C4000 },
         { .reg = 0x00009888, .val = 0x0C2C4000 },
         { .reg = 0x00009888, .val = 0x022EC300 },
         { .reg = 0x00009888, .val = 0x042E00C5 },
         { .reg = 0x00009888, .val = 0x102E0000 },
         { .reg = 0x00009888, .val = 0x062F4000 },
         { .reg = 0x00009888, .val = 0x082F4000 },
         { .reg = 0x00009888, .val = 0x1E5E0030 },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x145E8000 },
         { .reg = 0x00009888, .val = 0x1A604000 },
         { .reg = 0x00009888, .val = 0x1C600001 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x04604000 },
         { .reg = 0x00009888, .val = 0x1E613000 },
         { .reg = 0x00009888, .val = 0x12618000 },
         { .reg = 0x00009888, .val = 0x14618000 },
         { .reg = 0x00009888, .val = 0x0069C300 },
         { .reg = 0x00009888, .val = 0x026900C5 },
         { .reg = 0x00009888, .val = 0x10690000 },
         { .reg = 0x00009888, .val = 0x1A6A00F0 },
         { .reg = 0x00009888, .val = 0x026A4000 },
         { .reg = 0x00009888, .val = 0x046A4000 },
         { .reg = 0x00009888, .val = 0x186C5000 },
         { .reg = 0x00009888, .val = 0x026C4000 },
         { .reg = 0x00009888, .val = 0x046C4000 },
         { .reg = 0x00009888, .val = 0x0E6EC5C3 },
         { .reg = 0x00009888, .val = 0x106E0000 },
         { .reg = 0x00009888, .val = 0x1A6F5000 },
         { .reg = 0x00009888, .val = 0x1E9E000F },
         { .reg = 0x00009888, .val = 0x1AA01540 },
         { .reg = 0x00009888, .val = 0x1EA10F00 },
         { .reg = 0x00009888, .val = 0x0CA9C5C3 },
         { .reg = 0x00009888, .val = 0x10A90000 },
         { .reg = 0x00009888, .val = 0x18AAF000 },
         { .reg = 0x00009888, .val = 0x1AAA0005 },
         { .reg = 0x00009888, .val = 0x18AC0550 },
         { .reg = 0x00009888, .val = 0x0AAEC5C3 },
         { .reg = 0x00009888, .val = 0x10AE0000 },
         { .reg = 0x00009888, .val = 0x1AAF0050 },
         { .reg = 0x00009888, .val = 0x10DE8000 },
         { .reg = 0x00009888, .val = 0x1CDEE000 },
         { .reg = 0x00009888, .val = 0x00E04000 },
         { .reg = 0x00009888, .val = 0x0EE04000 },
         { .reg = 0x00009888, .val = 0x18E04000 },
         { .reg = 0x00009888, .val = 0x1AE00010 },
         { .reg = 0x00009888, .val = 0x10E18000 },
         { .reg = 0x00009888, .val = 0x1EE100E0 },
         { .reg = 0x00009888, .val = 0x08E9C5C3 },
         { .reg = 0x00009888, .val = 0x10E90000 },
         { .reg = 0x00009888, .val = 0x00EAC000 },
         { .reg = 0x00009888, .val = 0x0EEAC000 },
         { .reg = 0x00009888, .val = 0x18EA0500 },
         { .reg = 0x00009888, .val = 0x00EC4000 },
         { .reg = 0x00009888, .val = 0x0EEC4000 },
         { .reg = 0x00009888, .val = 0x18EC0005 },
         { .reg = 0x00009888, .val = 0x00EE00C3 },
         { .reg = 0x00009888, .val = 0x06EEC500 },
         { .reg = 0x00009888, .val = 0x10EE0000 },
         { .reg = 0x00009888, .val = 0x00EF4000 },
         { .reg = 0x00009888, .val = 0x0EEF4000 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FF00 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x0E08A000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x02082000 },
         { .reg = 0x00009888, .val = 0x14090050 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x06091000 },
         { .reg = 0x00009888, .val = 0x000D2000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DA000 },
         { .reg = 0x00009888, .val = 0x040E1000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x0C0E5000 },
         { .reg = 0x00009888, .val = 0x1812FC00 },
         { .reg = 0x00009888, .val = 0x1A12002B },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x0813C000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x00158000 },
         { .reg = 0x00009888, .val = 0x0E158000 },
         { .reg = 0x00009888, .val = 0x16152AD0 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x0615C000 },
         { .reg = 0x00009888, .val = 0x0815C000 },
         { .reg = 0x00009888, .val = 0x0A15C000 },
         { .reg = 0x00009888, .val = 0x0C15C000 },
         { .reg = 0x00009888, .val = 0x02188000 },
         { .reg = 0x00009888, .val = 0x0418A000 },
         { .reg = 0x00009888, .val = 0x06182000 },
         { .reg = 0x00009888, .val = 0x0A198000 },
         { .reg = 0x00009888, .val = 0x0C19A000 },
         { .reg = 0x00009888, .val = 0x0E192000 },
         { .reg = 0x00009888, .val = 0x0A1CA000 },
         { .reg = 0x00009888, .val = 0x0C1CA000 },
         { .reg = 0x00009888, .val = 0x121D5400 },
         { .reg = 0x00009888, .val = 0x141D0002 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2400 },
         { .reg = 0x00009888, .val = 0x3E1F3000 },
         { .reg = 0x00009888, .val = 0x461F4030 },
         { .reg = 0x00009888, .val = 0x481F0040 },
         { .reg = 0x00009888, .val = 0x4A1F1000 },
         { .reg = 0x00009888, .val = 0x4C1F0010 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x401F1010 },
         { .reg = 0x00009888, .val = 0x421F3030 },
         { .reg = 0x00009888, .val = 0x441F4040 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00000018 },
         { .reg = 0x00002774, .val = 0x0000FFFC },
         { .reg = 0x00002778, .val = 0x00000060 },
         { .reg = 0x0000277C, .val = 0x0000FFF3 },
         { .reg = 0x00002780, .val = 0x00000180 },
         { .reg = 0x00002784, .val = 0x0000FFCF },
         { .reg = 0x00002788, .val = 0x00000600 },
         { .reg = 0x0000278C, .val = 0x0000FF3F },
         { .reg = 0x00002790, .val = 0x00001800 },
         { .reg = 0x00002794, .val = 0x0000FCFF },
         { .reg = 0x00002798, .val = 0x00006000 },
         { .reg = 0x0000279C, .val = 0x0000F3FF },
         { .reg = 0x000027A0, .val = 0x00018000 },
         { .reg = 0x000027A4, .val = 0x0000CFFF },
         { .reg = 0x000027A8, .val = 0x00060000 },
         { .reg = 0x000027AC, .val = 0x00003FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E758, .val = 0x00015014 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
         { .reg = 0x0000E65C, .val = 0x00055054 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__vs_eu_active__read;
      counter->name = "VS EU Active";
      counter->desc = "The percentage of time in which vertex shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__vs_eu_active_per_thread__read;
      counter->name = "VS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuActivePerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__vs_eu_stall__read;
      counter->name = "VS EU Stall";
      counter->desc = "The percentage of time in which vertex shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "VsEuStall";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__vs_eu_stall_per_thread__read;
      counter->name = "VS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which vertex shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "VsEuStallPerThread";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 140;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 148;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__ps_eu_active__read;
      counter->name = "FS EU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuActive";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__ps_eu_active_per_thread__read;
      counter->name = "FS AVG Active per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were processed actively on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuActivePerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_1__ps_eu_stall__read;
      counter->name = "FS EU Stall";
      counter->desc = "The percentage of time in which fragment shaders were stalled on the EUs. Unit: percent.";
      counter->symbol_name = "PsEuStall";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__ps_eu_stall_per_thread__read;
      counter->name = "FS AVG Stall per Thread";
      counter->desc = "The average number of cycles per hardware thread run in which fragment shaders were stalled on the EUs. Unit: cycles.";
      counter->symbol_name = "PsEuStallPerThread";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 272;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 280;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 288;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_1__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 296;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 304;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 308;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 312;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread03_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 316;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread04_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 320;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread05_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 324;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread06_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice6 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread06ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 328;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_1__ps_thread07_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 subslice7 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread07ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 332;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "47e27fcf-9904-4fc0-a661-636dffeafe7d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 44);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14290000 },
         { .reg = 0x00009888, .val = 0x142E0000 },
         { .reg = 0x00009888, .val = 0x14690000 },
         { .reg = 0x00009888, .val = 0x146E0000 },
         { .reg = 0x00009888, .val = 0x14A90000 },
         { .reg = 0x00009888, .val = 0x14AE0000 },
         { .reg = 0x00009888, .val = 0x14E90000 },
         { .reg = 0x00009888, .val = 0x14EE0000 },
         { .reg = 0x00009888, .val = 0x1E1E0018 },
         { .reg = 0x00009888, .val = 0x1A205000 },
         { .reg = 0x00009888, .val = 0x1E211800 },
         { .reg = 0x00009888, .val = 0x0C29C100 },
         { .reg = 0x00009888, .val = 0x10290000 },
         { .reg = 0x00009888, .val = 0x1A2A0034 },
         { .reg = 0x00009888, .val = 0x182C1400 },
         { .reg = 0x00009888, .val = 0x0E2E00C1 },
         { .reg = 0x00009888, .val = 0x102E0000 },
         { .reg = 0x00009888, .val = 0x1A2F1000 },
         { .reg = 0x00009888, .val = 0x1E5E0020 },
         { .reg = 0x00009888, .val = 0x125E8000 },
         { .reg = 0x00009888, .val = 0x1C600001 },
         { .reg = 0x00009888, .val = 0x02604000 },
         { .reg = 0x00009888, .val = 0x1E612000 },
         { .reg = 0x00009888, .val = 0x12618000 },
         { .reg = 0x00009888, .val = 0x0E69C100 },
         { .reg = 0x00009888, .val = 0x10690000 },
         { .reg = 0x00009888, .val = 0x1A6A0040 },
         { .reg = 0x00009888, .val = 0x026AC000 },
         { .reg = 0x00009888, .val = 0x186C4000 },
         { .reg = 0x00009888, .val = 0x026C4000 },
         { .reg = 0x00009888, .val = 0x006EC100 },
         { .reg = 0x00009888, .val = 0x106E0000 },
         { .reg = 0x00009888, .val = 0x026F4000 },
         { .reg = 0x00009888, .val = 0x149E8000 },
         { .reg = 0x00009888, .val = 0x169E8000 },
         { .reg = 0x00009888, .val = 0x04A04000 },
         { .reg = 0x00009888, .val = 0x06A04000 },
         { .reg = 0x00009888, .val = 0x14A18000 },
         { .reg = 0x00009888, .val = 0x16A18000 },
         { .reg = 0x00009888, .val = 0x02A900C1 },
         { .reg = 0x00009888, .val = 0x10A90000 },
         { .reg = 0x00009888, .val = 0x04AA4000 },
         { .reg = 0x00009888, .val = 0x06AAC000 },
         { .reg = 0x00009888, .val = 0x04AC4000 },
         { .reg = 0x00009888, .val = 0x06AC4000 },
         { .reg = 0x00009888, .val = 0x02AEC100 },
         { .reg = 0x00009888, .val = 0x10AE0000 },
         { .reg = 0x00009888, .val = 0x06AF4000 },
         { .reg = 0x00009888, .val = 0x18DE8000 },
         { .reg = 0x00009888, .val = 0x1CDE0800 },
         { .reg = 0x00009888, .val = 0x08E04000 },
         { .reg = 0x00009888, .val = 0x0AE04000 },
         { .reg = 0x00009888, .val = 0x18E18000 },
         { .reg = 0x00009888, .val = 0x1EE10008 },
         { .reg = 0x00009888, .val = 0x04E900C1 },
         { .reg = 0x00009888, .val = 0x10E90000 },
         { .reg = 0x00009888, .val = 0x08EA4000 },
         { .reg = 0x00009888, .val = 0x0AEAC000 },
         { .reg = 0x00009888, .val = 0x08EC4000 },
         { .reg = 0x00009888, .val = 0x0AEC4000 },
         { .reg = 0x00009888, .val = 0x04EEC100 },
         { .reg = 0x00009888, .val = 0x10EE0000 },
         { .reg = 0x00009888, .val = 0x0AEF4000 },
         { .reg = 0x00009888, .val = 0x10040150 },
         { .reg = 0x00009888, .val = 0x0C045000 },
         { .reg = 0x00009888, .val = 0x0E040015 },
         { .reg = 0x00009888, .val = 0x14050054 },
         { .reg = 0x00009888, .val = 0x04054000 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x260600E0 },
         { .reg = 0x00009888, .val = 0x24063E00 },
         { .reg = 0x00009888, .val = 0x0E088000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x14090040 },
         { .reg = 0x00009888, .val = 0x04094000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x080E5000 },
         { .reg = 0x00009888, .val = 0x1A120020 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x0412C000 },
         { .reg = 0x00009888, .val = 0x0612C000 },
         { .reg = 0x00009888, .val = 0x0A138000 },
         { .reg = 0x00009888, .val = 0x0413E000 },
         { .reg = 0x00009888, .val = 0x0E148000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x0214C000 },
         { .reg = 0x00009888, .val = 0x16156000 },
         { .reg = 0x00009888, .val = 0x18150007 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06154000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C188000 },
         { .reg = 0x00009888, .val = 0x0E182000 },
         { .reg = 0x00009888, .val = 0x1419000A },
         { .reg = 0x00009888, .val = 0x021CA000 },
         { .reg = 0x00009888, .val = 0x0A1DA000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x1C1F3061 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x521F4000 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2410 },
         { .reg = 0x00009888, .val = 0x501F0024 },
         { .reg = 0x00009888, .val = 0x4C1F3040 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009888, .val = 0x401F1000 },
         { .reg = 0x00009888, .val = 0x421F4000 },
         { .reg = 0x00009888, .val = 0x441F1730 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_2__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread03_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread04_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread05_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread06_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice6 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice6 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread06ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_2__non_ps_thread07_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 Subslice7 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 subslice7 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread07ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_2__gt_request_queue_full__read;
      counter->name = "SQ is full";
      counter->desc = "The percentage of time when SQ is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueueFull";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_tdl_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TDL_3";
   query->symbol_name = "TDL_3";
   query->guid = "98154f5e-cf17-423c-a096-e696f87c2906";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 51);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14292C00 },
         { .reg = 0x00009888, .val = 0x16290013 },
         { .reg = 0x00009888, .val = 0x142E2C00 },
         { .reg = 0x00009888, .val = 0x162E0013 },
         { .reg = 0x00009888, .val = 0x14692C00 },
         { .reg = 0x00009888, .val = 0x16690013 },
         { .reg = 0x00009888, .val = 0x146E2C00 },
         { .reg = 0x00009888, .val = 0x166E0013 },
         { .reg = 0x00009888, .val = 0x14A92C00 },
         { .reg = 0x00009888, .val = 0x16A90013 },
         { .reg = 0x00009888, .val = 0x14AE2C00 },
         { .reg = 0x00009888, .val = 0x16AE0013 },
         { .reg = 0x00009888, .val = 0x14E92C00 },
         { .reg = 0x00009888, .val = 0x16E90013 },
         { .reg = 0x00009888, .val = 0x14EE2C00 },
         { .reg = 0x00009888, .val = 0x16EE0013 },
         { .reg = 0x00009888, .val = 0x101E8000 },
         { .reg = 0x00009888, .val = 0x1C1EE000 },
         { .reg = 0x00009888, .val = 0x00204000 },
         { .reg = 0x00009888, .val = 0x0E204000 },
         { .reg = 0x00009888, .val = 0x18204000 },
         { .reg = 0x00009888, .val = 0x1A200010 },
         { .reg = 0x00009888, .val = 0x10218000 },
         { .reg = 0x00009888, .val = 0x1E2100E0 },
         { .reg = 0x00009888, .val = 0x002900C7 },
         { .reg = 0x00009888, .val = 0x0629CF00 },
         { .reg = 0x00009888, .val = 0x10290000 },
         { .reg = 0x00009888, .val = 0x002A4000 },
         { .reg = 0x00009888, .val = 0x0E2A4000 },
         { .reg = 0x00009888, .val = 0x182A0F00 },
         { .reg = 0x00009888, .val = 0x002C4000 },
         { .reg = 0x00009888, .val = 0x0E2C4000 },
         { .reg = 0x00009888, .val = 0x182C0005 },
         { .reg = 0x00009888, .val = 0x082ECFC7 },
         { .reg = 0x00009888, .val = 0x102E0000 },
         { .reg = 0x00009888, .val = 0x1A2F0005 },
         { .reg = 0x00009888, .val = 0x1E5E000F },
         { .reg = 0x00009888, .val = 0x1A601540 },
         { .reg = 0x00009888, .val = 0x1E610F00 },
         { .reg = 0x00009888, .val = 0x0A69CFC7 },
         { .reg = 0x00009888, .val = 0x10690000 },
         { .reg = 0x00009888, .val = 0x186A5000 },
         { .reg = 0x00009888, .val = 0x1A6A000F },
         { .reg = 0x00009888, .val = 0x186C0550 },
         { .reg = 0x00009888, .val = 0x0C6ECFC7 },
         { .reg = 0x00009888, .val = 0x106E0000 },
         { .reg = 0x00009888, .val = 0x1A6F0500 },
         { .reg = 0x00009888, .val = 0x1E9E0030 },
         { .reg = 0x00009888, .val = 0x129E8000 },
         { .reg = 0x00009888, .val = 0x149E8000 },
         { .reg = 0x00009888, .val = 0x1AA04000 },
         { .reg = 0x00009888, .val = 0x1CA00001 },
         { .reg = 0x00009888, .val = 0x02A04000 },
         { .reg = 0x00009888, .val = 0x04A04000 },
         { .reg = 0x00009888, .val = 0x1EA13000 },
         { .reg = 0x00009888, .val = 0x12A18000 },
         { .reg = 0x00009888, .val = 0x14A18000 },
         { .reg = 0x00009888, .val = 0x0EA9CFC7 },
         { .reg = 0x00009888, .val = 0x10A90000 },
         { .reg = 0x00009888, .val = 0x1AAA0050 },
         { .reg = 0x00009888, .val = 0x02AAC000 },
         { .reg = 0x00009888, .val = 0x04AAC000 },
         { .reg = 0x00009888, .val = 0x18AC5000 },
         { .reg = 0x00009888, .val = 0x02AC4000 },
         { .reg = 0x00009888, .val = 0x04AC4000 },
         { .reg = 0x00009888, .val = 0x00AEC700 },
         { .reg = 0x00009888, .val = 0x02AE00CF },
         { .reg = 0x00009888, .val = 0x10AE0000 },
         { .reg = 0x00009888, .val = 0x02AF4000 },
         { .reg = 0x00009888, .val = 0x04AF4000 },
         { .reg = 0x00009888, .val = 0x16DE8000 },
         { .reg = 0x00009888, .val = 0x18DE8000 },
         { .reg = 0x00009888, .val = 0x1CDE1800 },
         { .reg = 0x00009888, .val = 0x06E04000 },
         { .reg = 0x00009888, .val = 0x08E04000 },
         { .reg = 0x00009888, .val = 0x0AE04000 },
         { .reg = 0x00009888, .val = 0x0CE04000 },
         { .reg = 0x00009888, .val = 0x16E18000 },
         { .reg = 0x00009888, .val = 0x18E18000 },
         { .reg = 0x00009888, .val = 0x1EE10018 },
         { .reg = 0x00009888, .val = 0x02E9C700 },
         { .reg = 0x00009888, .val = 0x04E900CF },
         { .reg = 0x00009888, .val = 0x10E90000 },
         { .reg = 0x00009888, .val = 0x06EA4000 },
         { .reg = 0x00009888, .val = 0x08EA4000 },
         { .reg = 0x00009888, .val = 0x0AEAC000 },
         { .reg = 0x00009888, .val = 0x0CEAC000 },
         { .reg = 0x00009888, .val = 0x06EC4000 },
         { .reg = 0x00009888, .val = 0x08EC4000 },
         { .reg = 0x00009888, .val = 0x0AEC4000 },
         { .reg = 0x00009888, .val = 0x0CEC4000 },
         { .reg = 0x00009888, .val = 0x04EEC700 },
         { .reg = 0x00009888, .val = 0x06EE00CF },
         { .reg = 0x00009888, .val = 0x10EE0000 },
         { .reg = 0x00009888, .val = 0x0AEF4000 },
         { .reg = 0x00009888, .val = 0x0CEF4000 },
         { .reg = 0x00009888, .val = 0x0C045400 },
         { .reg = 0x00009888, .val = 0x0E045555 },
         { .reg = 0x00009888, .val = 0x10040155 },
         { .reg = 0x00009888, .val = 0x04055000 },
         { .reg = 0x00009888, .val = 0x0A055000 },
         { .reg = 0x00009888, .val = 0x0C055000 },
         { .reg = 0x00009888, .val = 0x0E055000 },
         { .reg = 0x00009888, .val = 0x14050055 },
         { .reg = 0x00009888, .val = 0x06055000 },
         { .reg = 0x00009888, .val = 0x08055000 },
         { .reg = 0x00009888, .val = 0x2406FF00 },
         { .reg = 0x00009888, .val = 0x260600FF },
         { .reg = 0x00009888, .val = 0x0A08A000 },
         { .reg = 0x00009888, .val = 0x0C08A000 },
         { .reg = 0x00009888, .val = 0x0E095000 },
         { .reg = 0x00009888, .val = 0x14090005 },
         { .reg = 0x00009888, .val = 0x020D8000 },
         { .reg = 0x00009888, .val = 0x040DA000 },
         { .reg = 0x00009888, .val = 0x060D2000 },
         { .reg = 0x00009888, .val = 0x060E4000 },
         { .reg = 0x00009888, .val = 0x080E5000 },
         { .reg = 0x00009888, .val = 0x0A0E1000 },
         { .reg = 0x00009888, .val = 0x1812A800 },
         { .reg = 0x00009888, .val = 0x1A12003E },
         { .reg = 0x00009888, .val = 0x0212C000 },
         { .reg = 0x00009888, .val = 0x0412C000 },
         { .reg = 0x00009888, .val = 0x0813C000 },
         { .reg = 0x00009888, .val = 0x0A13F000 },
         { .reg = 0x00009888, .val = 0x04136000 },
         { .reg = 0x00009888, .val = 0x0A14C000 },
         { .reg = 0x00009888, .val = 0x0C14C000 },
         { .reg = 0x00009888, .val = 0x0E14C000 },
         { .reg = 0x00009888, .val = 0x00148000 },
         { .reg = 0x00009888, .val = 0x02144000 },
         { .reg = 0x00009888, .val = 0x0015C000 },
         { .reg = 0x00009888, .val = 0x0E15C000 },
         { .reg = 0x00009888, .val = 0x16152AF8 },
         { .reg = 0x00009888, .val = 0x18150005 },
         { .reg = 0x00009888, .val = 0x02154000 },
         { .reg = 0x00009888, .val = 0x04154000 },
         { .reg = 0x00009888, .val = 0x06158000 },
         { .reg = 0x00009888, .val = 0x08158000 },
         { .reg = 0x00009888, .val = 0x0A158000 },
         { .reg = 0x00009888, .val = 0x0C158000 },
         { .reg = 0x00009888, .val = 0x00182000 },
         { .reg = 0x00009888, .val = 0x06188000 },
         { .reg = 0x00009888, .val = 0x0818A000 },
         { .reg = 0x00009888, .val = 0x08192000 },
         { .reg = 0x00009888, .val = 0x0E198000 },
         { .reg = 0x00009888, .val = 0x12190140 },
         { .reg = 0x00009888, .val = 0x0E1CA000 },
         { .reg = 0x00009888, .val = 0x001C8000 },
         { .reg = 0x00009888, .val = 0x021C2000 },
         { .reg = 0x00009888, .val = 0x141D0028 },
         { .reg = 0x00009888, .val = 0x081D8000 },
         { .reg = 0x00009888, .val = 0x0A1D2000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x301F8000 },
         { .reg = 0x00009888, .val = 0x501F4924 },
         { .reg = 0x00009888, .val = 0x521F4924 },
         { .reg = 0x00009888, .val = 0x541F0024 },
         { .reg = 0x00009888, .val = 0x3A1F8000 },
         { .reg = 0x00009888, .val = 0x4E1F2410 },
         { .reg = 0x00009888, .val = 0x3E1F4000 },
         { .reg = 0x00009888, .val = 0x461F3040 },
         { .reg = 0x00009888, .val = 0x481F1030 },
         { .reg = 0x00009888, .val = 0x4A1F0010 },
         { .reg = 0x00009888, .val = 0x4C1F1000 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x401F0000 },
         { .reg = 0x00009888, .val = 0x421F4040 },
         { .reg = 0x00009888, .val = 0x441F3030 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x00800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00005004 },
         { .reg = 0x0000E558, .val = 0x00010003 },
         { .reg = 0x0000E658, .val = 0x00012011 },
         { .reg = 0x0000E45C, .val = 0x00051050 },
         { .reg = 0x0000E55C, .val = 0x00053052 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__tdl_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__eu_fpu_both_active__read;
      counter->name = "EU Both FPU Pipes Active";
      counter->desc = "The percentage of time in which both EU FPU pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuBothActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__vs_fpu0_active__read;
      counter->name = "VS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu0Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__vs_fpu1_active__read;
      counter->name = "VS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpu1Active";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__ps_fpu0_active__read;
      counter->name = "PS FPU0 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU0 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu0Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__ps_fpu1_active__read;
      counter->name = "PS FPU1 Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpu1Active";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__tdl_3__ps_eu_both_fpu_active__read;
      counter->name = "FS Both FPU Active";
      counter->desc = "The percentage of time in which fragment shaders were processed actively on the both FPUs. Unit: percent.";
      counter->symbol_name = "PsEuBothFpuActive";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__tdl_3__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 232;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 240;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice0 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 244;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 248;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice1 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 252;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 256;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice2 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 260;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header03_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 264;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header03_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice3 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice3 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 268;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header04_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 272;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header04_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice4 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice4 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 276;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header05_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 280;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header05_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice5 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice5 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 284;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header06_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader06ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 288;
      }

      if (perf->sys_vars.subslice_mask & 64) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header06_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice6 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice6 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader06ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 292;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header07_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader07ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 296;
      }

      if (perf->sys_vars.subslice_mask & 128) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = ehl__tdl_3__thread_header07_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 Subslice7 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 subslice7 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader07ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 300;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Gpu Rings Busyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "85254749-e937-4205-9b40-5df8847c9ee8";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 10);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x12021200 },
         { .reg = 0x00009888, .val = 0x14040120 },
         { .reg = 0x00009888, .val = 0x10002400 },
         { .reg = 0x00009888, .val = 0x02020083 },
         { .reg = 0x00009888, .val = 0x10020000 },
         { .reg = 0x00009888, .val = 0x00020000 },
         { .reg = 0x00009888, .val = 0x06040043 },
         { .reg = 0x00009888, .val = 0x10040000 },
         { .reg = 0x00009888, .val = 0x1C040004 },
         { .reg = 0x00009888, .val = 0x02000023 },
         { .reg = 0x00009888, .val = 0x22000000 },
         { .reg = 0x00009888, .val = 0x14000000 },
         { .reg = 0x00009888, .val = 0x18008000 },
         { .reg = 0x00009888, .val = 0x04194000 },
         { .reg = 0x00009888, .val = 0x081A8000 },
         { .reg = 0x00009888, .val = 0x3A1F6000 },
         { .reg = 0x00009888, .val = 0x4E1F2900 },
         { .reg = 0x00009888, .val = 0x501F00E9 },
         { .reg = 0x00009888, .val = 0x04218000 },
         { .reg = 0x00009888, .val = 0x06212000 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x401F0160 },
         { .reg = 0x00009888, .val = 0x421F0050 },
         { .reg = 0x00009888, .val = 0x441F0100 },
         { .reg = 0x00009888, .val = 0x301F0000 },
         { .reg = 0x00009884, .val = 0x00000002 },
         { .reg = 0x00009888, .val = 0x181B2400 },
         { .reg = 0x00009888, .val = 0x185B2400 },
         { .reg = 0x00009888, .val = 0x14231200 },
         { .reg = 0x00009888, .val = 0x06218000 },
         { .reg = 0x00009888, .val = 0x08212000 },
         { .reg = 0x00009888, .val = 0x080D8000 },
         { .reg = 0x00009888, .val = 0x040F4000 },
         { .reg = 0x00009888, .val = 0x0828C000 },
         { .reg = 0x00009888, .val = 0x041B8300 },
         { .reg = 0x00009888, .val = 0x221B0000 },
         { .reg = 0x00009888, .val = 0x1A1B0000 },
         { .reg = 0x00009888, .val = 0x22162000 },
         { .reg = 0x00009888, .val = 0x084D4000 },
         { .reg = 0x00009888, .val = 0x044F1000 },
         { .reg = 0x00009888, .val = 0x08683000 },
         { .reg = 0x00009888, .val = 0x045B0083 },
         { .reg = 0x00009888, .val = 0x225B0000 },
         { .reg = 0x00009888, .val = 0x22560800 },
         { .reg = 0x00009888, .val = 0x06230093 },
         { .reg = 0x00009888, .val = 0x20230000 },
         { .reg = 0x00009888, .val = 0x0A238000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0x10800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0x00800000 },
         { .reg = 0x00002770, .val = 0x0007C000 },
         { .reg = 0x00002774, .val = 0x000007FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__gpu_busyness__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 44;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = ehl__gpu_busyness__any_ring_busy__read;
      counter->name = "AnyRingBusy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyRingBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 48;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
ehl_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TestOa";
   query->symbol_name = "TestOa";
   query->guid = "4cbcfaf7-3c0c-42b8-a324-b1e58329c732";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 12);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x18130000 },
         { .reg = 0x00009888, .val = 0x22000004 },
         { .reg = 0x00009888, .val = 0x0E130076 },
         { .reg = 0x00009888, .val = 0x10130000 },
         { .reg = 0x00009888, .val = 0x1E130000 },
         { .reg = 0x00009888, .val = 0x0E164000 },
         { .reg = 0x00009888, .val = 0x1A1A0004 },
         { .reg = 0x00009888, .val = 0x541F0001 },
         { .reg = 0x00009888, .val = 0x141F0000 },
         { .reg = 0x00009888, .val = 0x4C1F0000 },
         { .reg = 0x00009888, .val = 0x301F0000 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x00002740, .val = 0x00000000 },
         { .reg = 0x00002710, .val = 0x00000000 },
         { .reg = 0x00002714, .val = 0xF0800000 },
         { .reg = 0x00002720, .val = 0x00000000 },
         { .reg = 0x00002724, .val = 0xF0800000 },
         { .reg = 0x00002770, .val = 0x00000004 },
         { .reg = 0x00002774, .val = 0x0000FFFF },
         { .reg = 0x00002778, .val = 0x00000003 },
         { .reg = 0x0000277C, .val = 0x0000FFFF },
         { .reg = 0x00002780, .val = 0x00000007 },
         { .reg = 0x00002784, .val = 0x0000FFFF },
         { .reg = 0x00002788, .val = 0x00100002 },
         { .reg = 0x0000278C, .val = 0x0000FFF7 },
         { .reg = 0x00002790, .val = 0x00100002 },
         { .reg = 0x00002794, .val = 0x0000FFCF },
         { .reg = 0x00002798, .val = 0x00100082 },
         { .reg = 0x0000279C, .val = 0x0000FFEF },
         { .reg = 0x000027A0, .val = 0x001000C2 },
         { .reg = 0x000027A4, .val = 0x0000FFE7 },
         { .reg = 0x000027A8, .val = 0x00100001 },
         { .reg = 0x000027AC, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = ehl__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.16666 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.6666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = ehl__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1. Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_ehl(struct intel_perf_config *perf)
{
   ehl_register_render_basic_counter_query(perf);
   ehl_register_compute_basic_counter_query(perf);
   ehl_register_compute_extended_counter_query(perf);
   ehl_register_compute_l3_cache_counter_query(perf);
   ehl_register_render_pipe_profile_counter_query(perf);
   ehl_register_hdc_and_sf_counter_query(perf);
   ehl_register_rasterizer_and_pixel_backend_counter_query(perf);
   ehl_register_l3_1_counter_query(perf);
   ehl_register_l3_2_counter_query(perf);
   ehl_register_l3_3_counter_query(perf);
   ehl_register_l3_4_counter_query(perf);
   ehl_register_l3_5_counter_query(perf);
   ehl_register_sampler_1_counter_query(perf);
   ehl_register_sampler_2_counter_query(perf);
   ehl_register_tdl_1_counter_query(perf);
   ehl_register_tdl_2_counter_query(perf);
   ehl_register_tdl_3_counter_query(perf);
   ehl_register_gpu_busyness_counter_query(perf);
   ehl_register_test_oa_counter_query(perf);
}


static void
tglgt1_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "c17af13d-3953-432b-9bd1-81346b4c2092";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 34);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14150001 },
         { .reg = 0x00009888, .val = 0x16150020 },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x10124000 },
         { .reg = 0x00009888, .val = 0x12124000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C130E00 },
         { .reg = 0x00009888, .val = 0x00150050 },
         { .reg = 0x00009888, .val = 0x06157000 },
         { .reg = 0x00009888, .val = 0x08157151 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x18150000 },
         { .reg = 0x00009888, .val = 0x1C150000 },
         { .reg = 0x00009888, .val = 0x0000C000 },
         { .reg = 0x00009888, .val = 0x0E00C000 },
         { .reg = 0x00009888, .val = 0x1000C000 },
         { .reg = 0x00009888, .val = 0x1200C000 },
         { .reg = 0x00009888, .val = 0x10058000 },
         { .reg = 0x00009888, .val = 0x1C058000 },
         { .reg = 0x00009888, .val = 0x22050030 },
         { .reg = 0x00009888, .val = 0x040A4000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0AC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D47 },
         { .reg = 0x00009888, .val = 0x09151536 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B1050BB },
         { .reg = 0x00009888, .val = 0x5D102C01 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110001 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x1F150137 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x0F168000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x1D350137 },
         { .reg = 0x00009888, .val = 0x03350147 },
         { .reg = 0x00009888, .val = 0x07350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x0F364000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101710 },
         { .reg = 0x00009888, .val = 0x57100007 },
         { .reg = 0x00009888, .val = 0x49101717 },
         { .reg = 0x00009888, .val = 0x4B100717 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__render_basic__sampler00_busy__read;
         counter->name = "Sampler00 Busy";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__render_basic__sampler00_bottleneck__read;
         counter->name = "Sampler Slice0 Dualsubslice0 is bottleneck";
         counter->desc = "The percentage of time when sampler slice0 dualsubslice0 is bottleneck Unit: percent.";
         counter->symbol_name = "Sampler00Bottleneck";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__render_basic__samplers_busy__read;
         counter->name = "Samplers Busy";
         counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplersBusy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__render_basic__sampler_bottleneck__read;
         counter->name = "Samplers Bottleneck";
         counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplerBottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "79a0514e-40d7-437a-90cf-33e02857adc6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 30);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "619eabea-0191-4d75-aaca-4217837215aa";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x00123E00 },
         { .reg = 0x00009888, .val = 0x060B00B3 },
         { .reg = 0x00009888, .val = 0x140B3C00 },
         { .reg = 0x00009888, .val = 0x1C0B0000 },
         { .reg = 0x00009888, .val = 0x120C8320 },
         { .reg = 0x00009888, .val = 0x040D7E00 },
         { .reg = 0x00009888, .val = 0x280D0000 },
         { .reg = 0x00009888, .val = 0x2C0E001F },
         { .reg = 0x00009888, .val = 0x10087C00 },
         { .reg = 0x00009888, .val = 0x1E120002 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x1E130002 },
         { .reg = 0x00009888, .val = 0x0E0B0031 },
         { .reg = 0x00009888, .val = 0x180B0092 },
         { .reg = 0x00009888, .val = 0x1A0B00B1 },
         { .reg = 0x00009888, .val = 0x020B0093 },
         { .reg = 0x00009888, .val = 0x040B0033 },
         { .reg = 0x00009888, .val = 0x000B0000 },
         { .reg = 0x00009888, .val = 0x0A0C0022 },
         { .reg = 0x00009888, .val = 0x1E0C00C2 },
         { .reg = 0x00009888, .val = 0x140C8000 },
         { .reg = 0x00009888, .val = 0x160C8000 },
         { .reg = 0x00009888, .val = 0x100D0017 },
         { .reg = 0x00009888, .val = 0x160D0013 },
         { .reg = 0x00009888, .val = 0x1C0D0081 },
         { .reg = 0x00009888, .val = 0x080D0082 },
         { .reg = 0x00009888, .val = 0x0A0D0102 },
         { .reg = 0x00009888, .val = 0x200D0000 },
         { .reg = 0x00009888, .val = 0x0E0D4000 },
         { .reg = 0x00009888, .val = 0x140D4000 },
         { .reg = 0x00009888, .val = 0x180D4000 },
         { .reg = 0x00009888, .val = 0x1A0D4000 },
         { .reg = 0x00009888, .val = 0x020D4000 },
         { .reg = 0x00009888, .val = 0x060D4000 },
         { .reg = 0x00009888, .val = 0x0C0E0225 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x100E8000 },
         { .reg = 0x00009888, .val = 0x140E8000 },
         { .reg = 0x00009888, .val = 0x160E8000 },
         { .reg = 0x00009888, .val = 0x180E8000 },
         { .reg = 0x00009888, .val = 0x1A0E8000 },
         { .reg = 0x00009888, .val = 0x1C0E8000 },
         { .reg = 0x00009888, .val = 0x020E8000 },
         { .reg = 0x00009888, .val = 0x040E8000 },
         { .reg = 0x00009888, .val = 0x060E8000 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x0A0E8000 },
         { .reg = 0x00009888, .val = 0x0E0F4000 },
         { .reg = 0x00009888, .val = 0x100F4000 },
         { .reg = 0x00009888, .val = 0x140F4000 },
         { .reg = 0x00009888, .val = 0x160F4000 },
         { .reg = 0x00009888, .val = 0x180F4000 },
         { .reg = 0x00009888, .val = 0x1A0F4000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x020F4000 },
         { .reg = 0x00009888, .val = 0x040F4000 },
         { .reg = 0x00009888, .val = 0x060F4000 },
         { .reg = 0x00009888, .val = 0x080F4000 },
         { .reg = 0x00009888, .val = 0x0A0F4000 },
         { .reg = 0x00009888, .val = 0x0C0F4000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12008000 },
         { .reg = 0x00009888, .val = 0x34005540 },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x06004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x0601C000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A01C000 },
         { .reg = 0x00009888, .val = 0x1801E000 },
         { .reg = 0x00009888, .val = 0x00018000 },
         { .reg = 0x00009888, .val = 0x0201C000 },
         { .reg = 0x00009888, .val = 0x0401C000 },
         { .reg = 0x00009888, .val = 0x22050800 },
         { .reg = 0x00009888, .val = 0x120A8000 },
         { .reg = 0x00009888, .val = 0x08081000 },
         { .reg = 0x00009888, .val = 0x16080000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x4D100604 },
         { .reg = 0x00009888, .val = 0x4F100400 },
         { .reg = 0x00009888, .val = 0x5110020A },
         { .reg = 0x00009888, .val = 0x53100004 },
         { .reg = 0x00009888, .val = 0x55100400 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100004 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100400 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "b1560378-cb32-4d4b-af30-ffeb163655e6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 31);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14112400 },
         { .reg = 0x00009888, .val = 0x14312400 },
         { .reg = 0x00009888, .val = 0x240A0019 },
         { .reg = 0x00009888, .val = 0x00110074 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x00128000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x0E310074 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x06310000 },
         { .reg = 0x00009888, .val = 0x0E328000 },
         { .reg = 0x00009888, .val = 0x1C330200 },
         { .reg = 0x00009888, .val = 0x0E0D8000 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x0E0F4000 },
         { .reg = 0x00009888, .val = 0x0000C000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x1000C000 },
         { .reg = 0x00009888, .val = 0x06018000 },
         { .reg = 0x00009888, .val = 0x10058000 },
         { .reg = 0x00009888, .val = 0x100A00F7 },
         { .reg = 0x00009888, .val = 0x140A0000 },
         { .reg = 0x00009888, .val = 0x040A4000 },
         { .reg = 0x00009888, .val = 0x0C0A0000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B146000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x47100400 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100110 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x70800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00070000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000002 },
         { .reg = 0x0000D94C, .val = 0x0000FFFD },
         { .reg = 0x0000DC08, .val = 0x00000002 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFD },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFFB },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFFB },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__hdc_and_sf__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "36e7fc09-3fb7-4b46-a6ff-ba1ce767db7c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 37);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1E055000 },
         { .reg = 0x00009888, .val = 0x1A0500C0 },
         { .reg = 0x00009888, .val = 0x2A0A7300 },
         { .reg = 0x00009888, .val = 0x2C0A0000 },
         { .reg = 0x00009888, .val = 0x120800A0 },
         { .reg = 0x00009888, .val = 0x0000C000 },
         { .reg = 0x00009888, .val = 0x0E00C000 },
         { .reg = 0x00009888, .val = 0x1000C000 },
         { .reg = 0x00009888, .val = 0x12008000 },
         { .reg = 0x00009888, .val = 0x34000080 },
         { .reg = 0x00009888, .val = 0x0800C000 },
         { .reg = 0x00009888, .val = 0x0A00C000 },
         { .reg = 0x00009888, .val = 0x0C00C000 },
         { .reg = 0x00009888, .val = 0x04052700 },
         { .reg = 0x00009888, .val = 0x060500C0 },
         { .reg = 0x00009888, .val = 0x22050000 },
         { .reg = 0x00009888, .val = 0x1C050000 },
         { .reg = 0x00009888, .val = 0x000A0144 },
         { .reg = 0x00009888, .val = 0x0E0A0145 },
         { .reg = 0x00009888, .val = 0x100A0156 },
         { .reg = 0x00009888, .val = 0x080A814F },
         { .reg = 0x00009888, .val = 0x140A0000 },
         { .reg = 0x00009888, .val = 0x040A0000 },
         { .reg = 0x00009888, .val = 0x0A0A4000 },
         { .reg = 0x00009888, .val = 0x0C0A0000 },
         { .reg = 0x00009888, .val = 0x08081980 },
         { .reg = 0x00009888, .val = 0x0A080032 },
         { .reg = 0x00009888, .val = 0x10080000 },
         { .reg = 0x00009888, .val = 0x16080000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x31152800 },
         { .reg = 0x00009888, .val = 0x331500A0 },
         { .reg = 0x00009888, .val = 0x31352800 },
         { .reg = 0x00009888, .val = 0x333500A0 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B10556B },
         { .reg = 0x00009888, .val = 0x5D103005 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D140001 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115540 },
         { .reg = 0x00009888, .val = 0x61110005 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x05150096 },
         { .reg = 0x00009888, .val = 0x07150016 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x0316C000 },
         { .reg = 0x00009888, .val = 0x1F350096 },
         { .reg = 0x00009888, .val = 0x03350016 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x0F368000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x47101200 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100202 },
         { .reg = 0x00009888, .val = 0x51100202 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101000 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x49101111 },
         { .reg = 0x00009888, .val = 0x4B100201 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000038 },
         { .reg = 0x0000D944, .val = 0x0000FFF8 },
         { .reg = 0x0000DC00, .val = 0x00000038 },
         { .reg = 0x0000DC04, .val = 0x0000FFF8 },
         { .reg = 0x0000D948, .val = 0x000000C0 },
         { .reg = 0x0000D94C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC08, .val = 0x000000C0 },
         { .reg = 0x0000DC0C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__pixel_data00_ready__read;
      counter->name = "Slice0 Pipe0 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData00Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__ps_output00_available__read;
      counter->name = "Slice0 Pipe0 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe0 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput00Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__pixel_values00_ready__read;
      counter->name = "Slice0 Pipe0 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe0 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues00Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__gt_request_queue00_full__read;
      counter->name = "SQ00 is full";
      counter->desc = "The percentage of time when IDI0 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue00Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__gt_request_queue01_full__read;
      counter->name = "SQ01 is full";
      counter->desc = "The percentage of time when IDI0 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue01Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__gt_request_queue10_full__read;
      counter->name = "SQ10 is full";
      counter->desc = "The percentage of time when IDI1 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue10Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__rasterizer_and_pixel_backend__gt_request_queue11_full__read;
      counter->name = "SQ11 is full";
      counter->desc = "The percentage of time when IDI1 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue11Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_1";
   query->symbol_name = "L3_1";
   query->guid = "4a46a4ee-437d-4726-ac61-31e47072f706";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04002024 },
         { .reg = 0x00009888, .val = 0x0600282F },
         { .reg = 0x00009888, .val = 0x1C000027 },
         { .reg = 0x00009888, .val = 0x1E000026 },
         { .reg = 0x00009888, .val = 0x02000025 },
         { .reg = 0x00009888, .val = 0x0800002E },
         { .reg = 0x00009888, .val = 0x0A00002D },
         { .reg = 0x00009888, .val = 0x0C00002C },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x34000000 },
         { .reg = 0x00009888, .val = 0x36000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100555 },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x5F110555 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_1__l30_bank0_input_available__read;
         counter->name = "Slice0 L3 Bank0 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank0 has input available Unit: percent.";
         counter->symbol_name = "L30Bank0InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_1__l30_bank1_input_available__read;
         counter->name = "Slice0 L3 Bank1 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank1 has input available Unit: percent.";
         counter->symbol_name = "L30Bank1InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_2";
   query->symbol_name = "L3_2";
   query->guid = "2456d8be-5c99-430c-8172-8294fdc39331";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04003000 },
         { .reg = 0x00009888, .val = 0x06003800 },
         { .reg = 0x00009888, .val = 0x00000024 },
         { .reg = 0x00009888, .val = 0x0E000025 },
         { .reg = 0x00009888, .val = 0x10000026 },
         { .reg = 0x00009888, .val = 0x12000027 },
         { .reg = 0x00009888, .val = 0x1400002C },
         { .reg = 0x00009888, .val = 0x1600002D },
         { .reg = 0x00009888, .val = 0x1800002E },
         { .reg = 0x00009888, .val = 0x1A00002F },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x34000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x5D100155 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D14000F },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110155 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_2__l30_bank2_input_available__read;
         counter->name = "Slice0 L3 Bank2 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank2 has input available Unit: percent.";
         counter->symbol_name = "L30Bank2InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_2__l30_bank3_input_available__read;
         counter->name = "Slice0 L3 Bank3 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank3 has input available Unit: percent.";
         counter->symbol_name = "L30Bank3InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_3";
   query->symbol_name = "L3_3";
   query->guid = "71e2a606-810c-4252-bf7e-16fed118f4b5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04000200 },
         { .reg = 0x00009888, .val = 0x06000000 },
         { .reg = 0x00009888, .val = 0x0A000020 },
         { .reg = 0x00009888, .val = 0x0C000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100500 },
         { .reg = 0x00009888, .val = 0x1B141800 },
         { .reg = 0x00009888, .val = 0x5F110500 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_3__l30_bank0_output_ready__read;
         counter->name = "Slice0 L3 Bank0 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank0 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank0OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_4";
   query->symbol_name = "L3_4";
   query->guid = "327a91a5-f93c-449d-b5e1-ab92cb1180a1";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04000A00 },
         { .reg = 0x00009888, .val = 0x06000820 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100050 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B140400 },
         { .reg = 0x00009888, .val = 0x5F110050 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_4__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_4__l30_bank1_output_ready__read;
         counter->name = "Slice0 L3 Bank1 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank1 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank1OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_l3_5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_5";
   query->symbol_name = "L3_5";
   query->guid = "a8de84ca-1624-4e0c-8046-d5265566c15b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04001200 },
         { .reg = 0x00009888, .val = 0x06001020 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100050 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B140400 },
         { .reg = 0x00009888, .val = 0x5F110050 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__l3_5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_5__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_5__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_5__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_5__l30_bank2_output_ready__read;
         counter->name = "Slice0 L3 Bank2 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank2 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank2OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_l3_6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_6";
   query->symbol_name = "L3_6";
   query->guid = "6f376729-c56a-4560-a0eb-f4a17ec6df84";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04001A00 },
         { .reg = 0x00009888, .val = 0x06001820 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100050 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B140400 },
         { .reg = 0x00009888, .val = 0x5F110050 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__l3_6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__l3_6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_6__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_6__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__l3_6__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__l3_6__l30_bank3_output_ready__read;
         counter->name = "Slice0 L3 Bank3 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank3 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank3OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_1";
   query->symbol_name = "Sampler_1";
   query->guid = "4876283b-1889-4cef-880f-58de9dc676cb";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0C123E00 },
         { .reg = 0x00009888, .val = 0x1C121600 },
         { .reg = 0x00009888, .val = 0x04143E00 },
         { .reg = 0x00009888, .val = 0x18141613 },
         { .reg = 0x00009888, .val = 0x0C323E00 },
         { .reg = 0x00009888, .val = 0x1C321600 },
         { .reg = 0x00009888, .val = 0x04343E00 },
         { .reg = 0x00009888, .val = 0x18341600 },
         { .reg = 0x00009888, .val = 0x0E120086 },
         { .reg = 0x00009888, .val = 0x1A120033 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x18124000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C13C200 },
         { .reg = 0x00009888, .val = 0x001400A6 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x0C140000 },
         { .reg = 0x00009888, .val = 0x18158000 },
         { .reg = 0x00009888, .val = 0x1C150800 },
         { .reg = 0x00009888, .val = 0x12320086 },
         { .reg = 0x00009888, .val = 0x16320033 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x10324000 },
         { .reg = 0x00009888, .val = 0x14324000 },
         { .reg = 0x00009888, .val = 0x1C333C00 },
         { .reg = 0x00009888, .val = 0x103400A6 },
         { .reg = 0x00009888, .val = 0x14340013 },
         { .reg = 0x00009888, .val = 0x08340000 },
         { .reg = 0x00009888, .val = 0x0A340000 },
         { .reg = 0x00009888, .val = 0x1C350088 },
         { .reg = 0x00009888, .val = 0x100D8000 },
         { .reg = 0x00009888, .val = 0x120D8000 },
         { .reg = 0x00009888, .val = 0x140D8000 },
         { .reg = 0x00009888, .val = 0x160D8000 },
         { .reg = 0x00009888, .val = 0x100E8000 },
         { .reg = 0x00009888, .val = 0x120E8000 },
         { .reg = 0x00009888, .val = 0x140E8000 },
         { .reg = 0x00009888, .val = 0x160E8000 },
         { .reg = 0x00009888, .val = 0x100F4000 },
         { .reg = 0x00009888, .val = 0x120F4000 },
         { .reg = 0x00009888, .val = 0x140F4000 },
         { .reg = 0x00009888, .val = 0x160F4000 },
         { .reg = 0x00009888, .val = 0x0000C000 },
         { .reg = 0x00009888, .val = 0x0E00C000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34003D40 },
         { .reg = 0x00009888, .val = 0x0801C000 },
         { .reg = 0x00009888, .val = 0x0A01C000 },
         { .reg = 0x00009888, .val = 0x10058000 },
         { .reg = 0x00009888, .val = 0x1C058000 },
         { .reg = 0x00009888, .val = 0x22050300 },
         { .reg = 0x00009888, .val = 0x040A4000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x100AC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x5D100155 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D14000F },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110155 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x47100400 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100008 },
         { .reg = 0x00009888, .val = 0x51100004 },
         { .reg = 0x00009888, .val = 0x53100404 },
         { .reg = 0x00009888, .val = 0x55100008 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x000F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__sampler_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__sampler_1__sampler00_input_available__read;
         counter->name = "Slice0 DualSubslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__sampler_1__sampler01_input_available__read;
         counter->name = "Slice0 DualSubslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__sampler_1__sampler00_output_ready__read;
         counter->name = "Slice0 DualSubslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__sampler_1__sampler01_output_ready__read;
         counter->name = "Slice0 DualSubslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "aeddd71b-2f93-48de-9e69-1cba5865473f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 27);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x24110340 },
         { .reg = 0x00009888, .val = 0x2611001C },
         { .reg = 0x00009888, .val = 0x24310340 },
         { .reg = 0x00009888, .val = 0x2631001C },
         { .reg = 0x00009888, .val = 0x001100F3 },
         { .reg = 0x00009888, .val = 0x0E1100F2 },
         { .reg = 0x00009888, .val = 0x16110103 },
         { .reg = 0x00009888, .val = 0x06110107 },
         { .reg = 0x00009888, .val = 0x08110106 },
         { .reg = 0x00009888, .val = 0x0A110105 },
         { .reg = 0x00009888, .val = 0x0C110104 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x02110000 },
         { .reg = 0x00009888, .val = 0x04110000 },
         { .reg = 0x00009888, .val = 0x00128000 },
         { .reg = 0x00009888, .val = 0x0E128000 },
         { .reg = 0x00009888, .val = 0x16128000 },
         { .reg = 0x00009888, .val = 0x06128000 },
         { .reg = 0x00009888, .val = 0x08128000 },
         { .reg = 0x00009888, .val = 0x0A128000 },
         { .reg = 0x00009888, .val = 0x0C128000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C132300 },
         { .reg = 0x00009888, .val = 0x16138000 },
         { .reg = 0x00009888, .val = 0x18138000 },
         { .reg = 0x00009888, .val = 0x1A138000 },
         { .reg = 0x00009888, .val = 0x103100F3 },
         { .reg = 0x00009888, .val = 0x123100F2 },
         { .reg = 0x00009888, .val = 0x14310103 },
         { .reg = 0x00009888, .val = 0x1C310107 },
         { .reg = 0x00009888, .val = 0x1E310106 },
         { .reg = 0x00009888, .val = 0x02310105 },
         { .reg = 0x00009888, .val = 0x04310104 },
         { .reg = 0x00009888, .val = 0x08310000 },
         { .reg = 0x00009888, .val = 0x0A310000 },
         { .reg = 0x00009888, .val = 0x0E310000 },
         { .reg = 0x00009888, .val = 0x00310000 },
         { .reg = 0x00009888, .val = 0x10328000 },
         { .reg = 0x00009888, .val = 0x12328000 },
         { .reg = 0x00009888, .val = 0x14328000 },
         { .reg = 0x00009888, .val = 0x1C328000 },
         { .reg = 0x00009888, .val = 0x1E328000 },
         { .reg = 0x00009888, .val = 0x02328000 },
         { .reg = 0x00009888, .val = 0x04328000 },
         { .reg = 0x00009888, .val = 0x1C331C00 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x12338000 },
         { .reg = 0x00009888, .val = 0x14338000 },
         { .reg = 0x00009888, .val = 0x100D8000 },
         { .reg = 0x00009888, .val = 0x120D8000 },
         { .reg = 0x00009888, .val = 0x140D8000 },
         { .reg = 0x00009888, .val = 0x1C0D8000 },
         { .reg = 0x00009888, .val = 0x1E0D8000 },
         { .reg = 0x00009888, .val = 0x020D8000 },
         { .reg = 0x00009888, .val = 0x040D8000 },
         { .reg = 0x00009888, .val = 0x100E8000 },
         { .reg = 0x00009888, .val = 0x120E8000 },
         { .reg = 0x00009888, .val = 0x140E8000 },
         { .reg = 0x00009888, .val = 0x1C0E8000 },
         { .reg = 0x00009888, .val = 0x1E0E8000 },
         { .reg = 0x00009888, .val = 0x020E8000 },
         { .reg = 0x00009888, .val = 0x040E8000 },
         { .reg = 0x00009888, .val = 0x100F4000 },
         { .reg = 0x00009888, .val = 0x120F4000 },
         { .reg = 0x00009888, .val = 0x140F4000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x020F4000 },
         { .reg = 0x00009888, .val = 0x040F4000 },
         { .reg = 0x00009888, .val = 0x0000C000 },
         { .reg = 0x00009888, .val = 0x0E00C000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34004340 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x0600C000 },
         { .reg = 0x00009888, .val = 0x0800C000 },
         { .reg = 0x00009888, .val = 0x0A00C000 },
         { .reg = 0x00009888, .val = 0x0C00C000 },
         { .reg = 0x00009888, .val = 0x0801C000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x00018000 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x10058000 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x22050080 },
         { .reg = 0x00009888, .val = 0x16058000 },
         { .reg = 0x00009888, .val = 0x18058000 },
         { .reg = 0x00009888, .val = 0x1A058000 },
         { .reg = 0x00009888, .val = 0x040A4000 },
         { .reg = 0x00009888, .val = 0x0A0AC000 },
         { .reg = 0x00009888, .val = 0x0E0A8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080AC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101415 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D140033 },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111415 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x47100400 },
         { .reg = 0x00009888, .val = 0x4D100404 },
         { .reg = 0x00009888, .val = 0x4F100004 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100004 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100404 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000000 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000000 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000000 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000000 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFEF },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFEF },
         { .reg = 0x0000D958, .val = 0x00000002 },
         { .reg = 0x0000D95C, .val = 0x0000FFDF },
         { .reg = 0x0000DC18, .val = 0x00000002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFDF },
         { .reg = 0x0000D960, .val = 0x00078000 },
         { .reg = 0x0000D964, .val = 0x00000FFF },
         { .reg = 0x0000DC20, .val = 0x00078000 },
         { .reg = 0x0000DC24, .val = 0x00000FFF },
         { .reg = 0x0000D968, .val = 0x00007800 },
         { .reg = 0x0000D96C, .val = 0x0000F0FF },
         { .reg = 0x0000DC28, .val = 0x00007800 },
         { .reg = 0x0000DC2C, .val = 0x0000F0FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__tdl_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header00_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header00_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header01_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header01_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header00_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader00Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt1__tdl_1__thread_header01_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader01Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "GpuBusyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "a4089c3f-a697-4213-a58b-70e150969226";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 22);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x040E0043 },
         { .reg = 0x00009888, .val = 0x0A0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x080F00A3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x040F4000 },
         { .reg = 0x00009888, .val = 0x0A0F4000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x0401C000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x15102400 },
         { .reg = 0x00009888, .val = 0x230B0120 },
         { .reg = 0x00009888, .val = 0x15182400 },
         { .reg = 0x00009888, .val = 0x232B0120 },
         { .reg = 0x00009888, .val = 0x17100023 },
         { .reg = 0x00009888, .val = 0x11100000 },
         { .reg = 0x00009888, .val = 0x5D101000 },
         { .reg = 0x00009888, .val = 0x5B100545 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x1B140C00 },
         { .reg = 0x00009888, .val = 0x61112000 },
         { .reg = 0x00009888, .val = 0x5F110945 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x13138000 },
         { .reg = 0x00009888, .val = 0x1B0D0040 },
         { .reg = 0x00009888, .val = 0x1F0B00D3 },
         { .reg = 0x00009888, .val = 0x190B0000 },
         { .reg = 0x00009888, .val = 0x170B0000 },
         { .reg = 0x00009888, .val = 0x1B170002 },
         { .reg = 0x00009888, .val = 0x0D174000 },
         { .reg = 0x00009888, .val = 0x071800A3 },
         { .reg = 0x00009888, .val = 0x11180000 },
         { .reg = 0x00009888, .val = 0x032D4000 },
         { .reg = 0x00009888, .val = 0x032B00D3 },
         { .reg = 0x00009888, .val = 0x192B0000 },
         { .reg = 0x00009888, .val = 0x092B0000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100009 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49101005 },
         { .reg = 0x00009888, .val = 0x4B100207 },
         { .reg = 0x00009888, .val = 0x4D100010 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00024002 },
         { .reg = 0x0000D944, .val = 0x0000B7FF },
         { .reg = 0x0000DC00, .val = 0x00024002 },
         { .reg = 0x0000DC04, .val = 0x0000B7FF },
         { .reg = 0x0000D948, .val = 0x0007F000 },
         { .reg = 0x0000D94C, .val = 0x000001FF },
         { .reg = 0x0000DC08, .val = 0x0007F000 },
         { .reg = 0x0000DC0C, .val = 0x000001FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__gpu_busyness__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__posh_engine_busy__read;
      counter->name = "Posh Ring Busy";
      counter->desc = "The percentage of time when posh command streamer was busy. Unit: percent.";
      counter->symbol_name = "PoshEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__gpu_busyness__any_engine_busy__read;
      counter->name = "Any Engine Busy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity1";
   query->symbol_name = "EuActivity1";
   query->guid = "ffde7a80-6d78-42c8-8603-838de28d07da";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00810710 },
         { .reg = 0x0000E558, .val = 0x00A10910 },
         { .reg = 0x0000E658, .val = 0x00850750 },
         { .reg = 0x0000E758, .val = 0x00A50950 },
         { .reg = 0x0000E45C, .val = 0x00802702 },
         { .reg = 0x0000E55C, .val = 0x00A02902 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity1__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity1__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity1__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity1__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity1__vs_fpu_active__read;
      counter->name = "VS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity1__ps_fpu_active__read;
      counter->name = "PS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpuActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity1__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity2";
   query->symbol_name = "EuActivity2";
   query->guid = "bd3186a3-6e96-4c70-a5d9-65d4bea6f668";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00862762 },
         { .reg = 0x0000E558, .val = 0x00A62962 },
         { .reg = 0x0000E658, .val = 0x00860760 },
         { .reg = 0x0000E758, .val = 0x00A60960 },
         { .reg = 0x0000E45C, .val = 0x00861761 },
         { .reg = 0x0000E55C, .val = 0x00A61961 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity2__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity2__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity2__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity2__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity2__cs_em_active__read;
      counter->name = "CS EM Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsEmActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity2__cs_fpu_active__read;
      counter->name = "CS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity2__cs_send_active__read;
      counter->name = "CS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsSendActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity3";
   query->symbol_name = "EuActivity3";
   query->guid = "b59e975f-044c-4e0c-afcd-46aee1107b81";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00811711 },
         { .reg = 0x0000E558, .val = 0x00A11911 },
         { .reg = 0x0000E658, .val = 0x00851751 },
         { .reg = 0x0000E758, .val = 0x00A51951 },
         { .reg = 0x0000E45C, .val = 0x00852752 },
         { .reg = 0x0000E55C, .val = 0x00A52952 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity3__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity3__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity3__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity3__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity3__vs_em_active__read;
      counter->name = "VS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsEmActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity3__ps_em_active__read;
      counter->name = "PS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsEmActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity4";
   query->symbol_name = "EuActivity4";
   query->guid = "948f1627-f642-42a6-8304-801d7db112d2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00820720 },
         { .reg = 0x0000E558, .val = 0x00A20920 },
         { .reg = 0x0000E658, .val = 0x00830730 },
         { .reg = 0x0000E758, .val = 0x00A30930 },
         { .reg = 0x0000E45C, .val = 0x00812712 },
         { .reg = 0x0000E55C, .val = 0x00A12912 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity4__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity4__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity4__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity4__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity4__hs_fpu_active__read;
      counter->name = "HS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsFpuActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity4__ds_fpu_active__read;
      counter->name = "DS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsFpuActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity5";
   query->symbol_name = "EuActivity5";
   query->guid = "243ffad2-626e-4d05-94a0-af7068e18a84";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00821721 },
         { .reg = 0x0000E558, .val = 0x00A21921 },
         { .reg = 0x0000E658, .val = 0x00831731 },
         { .reg = 0x0000E758, .val = 0x00A31931 },
         { .reg = 0x0000E45C, .val = 0x00822722 },
         { .reg = 0x0000E55C, .val = 0x00A22922 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity5__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity5__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity5__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity5__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity5__hs_em_active__read;
      counter->name = "HS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a hull shader instructions. Unit: percent.";
      counter->symbol_name = "HsEmActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity5__ds_em_active__read;
      counter->name = "DS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsEmActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity5__hs_send_active__read;
      counter->name = "HS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsSendActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity6";
   query->symbol_name = "EuActivity6";
   query->guid = "1f367b67-1f45-469b-97fd-eac88aeb1f7c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00840740 },
         { .reg = 0x0000E558, .val = 0x00A40940 },
         { .reg = 0x0000E658, .val = 0x00841741 },
         { .reg = 0x0000E758, .val = 0x00A41941 },
         { .reg = 0x0000E45C, .val = 0x00842742 },
         { .reg = 0x0000E55C, .val = 0x00A42942 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity6__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity6__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity6__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity6__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity6__gs_fpu_active__read;
      counter->name = "GS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsFpuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity6__gs_em_active__read;
      counter->name = "GS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsEmActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity6__gs_send_active__read;
      counter->name = "GS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a geometry shader instruction. Unit: percent.";
      counter->symbol_name = "GsSendActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity7_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity7";
   query->symbol_name = "EuActivity7";
   query->guid = "7c2db23c-1fb4-45df-a5d5-2345d23c10fc";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00803703 },
         { .reg = 0x0000E558, .val = 0x00A03903 },
         { .reg = 0x0000E658, .val = 0x00800700 },
         { .reg = 0x0000E758, .val = 0x00A00900 },
         { .reg = 0x0000E45C, .val = 0x00801701 },
         { .reg = 0x0000E55C, .val = 0x00A01901 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity7__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__fpu_active__read;
      counter->name = "EU FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "FpuActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__em_active__read;
      counter->name = "EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__eu_fpu_em_active__read;
      counter->name = "EU FPU And EM Pipes Active";
      counter->desc = "The percentage of time in which EU FPU and EM pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuEmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity7__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity7__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_eu_activity8_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity8";
   query->symbol_name = "EuActivity8";
   query->guid = "b4c81162-2c96-4cf1-86ae-ecff3d57ee4d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 16);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1C0F4000 },
         { .reg = 0x00009888, .val = 0x1E0F4000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x18018000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101313 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00832732 },
         { .reg = 0x0000E558, .val = 0x00A32932 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__eu_activity8__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity8__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity8__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity8__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity8__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__eu_activity8__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt1__eu_activity8__ds_send_active__read;
      counter->name = "DS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a domain shader instruction. Unit: percent.";
      counter->symbol_name = "DsSendActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt1_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TestOa";
   query->symbol_name = "TestOa";
   query->guid = "6f27aaed-4f08-4e0f-95a9-231d9b4fa111";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 13);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x040F0000 },
         { .reg = 0x00009888, .val = 0x1E0F0017 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x49110000 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x1D140020 },
         { .reg = 0x00009888, .val = 0x1D1103A3 },
         { .reg = 0x00009888, .val = 0x01110000 },
         { .reg = 0x00009888, .val = 0x61111000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100210 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000004 },
         { .reg = 0x0000D944, .val = 0x0000FFFF },
         { .reg = 0x0000DC00, .val = 0x00000004 },
         { .reg = 0x0000DC04, .val = 0x0000FFFF },
         { .reg = 0x0000D948, .val = 0x00000003 },
         { .reg = 0x0000D94C, .val = 0x0000FFFF },
         { .reg = 0x0000DC08, .val = 0x00000003 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFF },
         { .reg = 0x0000D950, .val = 0x00000007 },
         { .reg = 0x0000D954, .val = 0x0000FFFF },
         { .reg = 0x0000DC10, .val = 0x00000007 },
         { .reg = 0x0000DC14, .val = 0x0000FFFF },
         { .reg = 0x0000D958, .val = 0x00100002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00100002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00100002 },
         { .reg = 0x0000D964, .val = 0x0000FFCF },
         { .reg = 0x0000DC20, .val = 0x00100002 },
         { .reg = 0x0000DC24, .val = 0x0000FFCF },
         { .reg = 0x0000D968, .val = 0x00100082 },
         { .reg = 0x0000D96C, .val = 0x0000FFEF },
         { .reg = 0x0000DC28, .val = 0x00100082 },
         { .reg = 0x0000DC2C, .val = 0x0000FFEF },
         { .reg = 0x0000D970, .val = 0x001000C2 },
         { .reg = 0x0000D974, .val = 0x0000FFE7 },
         { .reg = 0x0000DC30, .val = 0x001000C2 },
         { .reg = 0x0000DC34, .val = 0x0000FFE7 },
         { .reg = 0x0000D978, .val = 0x00100001 },
         { .reg = 0x0000D97C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC38, .val = 0x00100001 },
         { .reg = 0x0000DC3C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt1__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.16666 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.6666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1 in IOStream or in OAG query mode Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt1__test_oa__counter9__read;
      counter->name = "TestCounter9 - OAR enable";
      counter->desc = "HW test counter 9. Should be equal to 1 in query. Unit: events.";
      counter->symbol_name = "Counter9";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_tglgt1(struct intel_perf_config *perf)
{
   tglgt1_register_render_basic_counter_query(perf);
   tglgt1_register_compute_basic_counter_query(perf);
   tglgt1_register_render_pipe_profile_counter_query(perf);
   tglgt1_register_hdc_and_sf_counter_query(perf);
   tglgt1_register_rasterizer_and_pixel_backend_counter_query(perf);
   tglgt1_register_l3_1_counter_query(perf);
   tglgt1_register_l3_2_counter_query(perf);
   tglgt1_register_l3_3_counter_query(perf);
   tglgt1_register_l3_4_counter_query(perf);
   tglgt1_register_l3_5_counter_query(perf);
   tglgt1_register_l3_6_counter_query(perf);
   tglgt1_register_sampler_1_counter_query(perf);
   tglgt1_register_tdl_1_counter_query(perf);
   tglgt1_register_gpu_busyness_counter_query(perf);
   tglgt1_register_eu_activity1_counter_query(perf);
   tglgt1_register_eu_activity2_counter_query(perf);
   tglgt1_register_eu_activity3_counter_query(perf);
   tglgt1_register_eu_activity4_counter_query(perf);
   tglgt1_register_eu_activity5_counter_query(perf);
   tglgt1_register_eu_activity6_counter_query(perf);
   tglgt1_register_eu_activity7_counter_query(perf);
   tglgt1_register_eu_activity8_counter_query(perf);
   tglgt1_register_test_oa_counter_query(perf);
}


static void
tglgt2_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "0fc397c0-4833-492c-9ccd-4929d574d5b8";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 34);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14150001 },
         { .reg = 0x00009888, .val = 0x16150020 },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x10124000 },
         { .reg = 0x00009888, .val = 0x12124000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C130E00 },
         { .reg = 0x00009888, .val = 0x00150050 },
         { .reg = 0x00009888, .val = 0x06157000 },
         { .reg = 0x00009888, .val = 0x08157151 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x18150000 },
         { .reg = 0x00009888, .val = 0x1C150000 },
         { .reg = 0x00009888, .val = 0x18004000 },
         { .reg = 0x00009888, .val = 0x36000490 },
         { .reg = 0x00009888, .val = 0x1C058000 },
         { .reg = 0x00009888, .val = 0x2405002A },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D47 },
         { .reg = 0x00009888, .val = 0x09151536 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B1050BB },
         { .reg = 0x00009888, .val = 0x5D102C01 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110001 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x1F150137 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x0F168000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x1D350137 },
         { .reg = 0x00009888, .val = 0x03350147 },
         { .reg = 0x00009888, .val = 0x07350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x0F364000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55103130 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x49103131 },
         { .reg = 0x00009888, .val = 0x4B100131 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__render_basic__sampler00_busy__read;
         counter->name = "Sampler00 Busy";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__render_basic__sampler00_bottleneck__read;
         counter->name = "Sampler Slice0 Dualsubslice0 is bottleneck";
         counter->desc = "The percentage of time when sampler slice0 dualsubslice0 is bottleneck Unit: percent.";
         counter->symbol_name = "Sampler00Bottleneck";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__render_basic__samplers_busy__read;
         counter->name = "Samplers Busy";
         counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplersBusy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__render_basic__sampler_bottleneck__read;
         counter->name = "Samplers Bottleneck";
         counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplerBottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "397a46d9-03dd-4696-8196-270362e1c575";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 30);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "684ed715-a0ca-499b-89e0-25d1cdf0c737";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x00123E00 },
         { .reg = 0x00009888, .val = 0x060B00B3 },
         { .reg = 0x00009888, .val = 0x140B3C00 },
         { .reg = 0x00009888, .val = 0x1C0B0000 },
         { .reg = 0x00009888, .val = 0x120C8320 },
         { .reg = 0x00009888, .val = 0x040DBE00 },
         { .reg = 0x00009888, .val = 0x000D0000 },
         { .reg = 0x00009888, .val = 0x280D0000 },
         { .reg = 0x00009888, .val = 0x2C0E7C00 },
         { .reg = 0x00009888, .val = 0x10087C00 },
         { .reg = 0x00009888, .val = 0x1E120002 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x1E130002 },
         { .reg = 0x00009888, .val = 0x0E0B0031 },
         { .reg = 0x00009888, .val = 0x180B0092 },
         { .reg = 0x00009888, .val = 0x1A0B00B1 },
         { .reg = 0x00009888, .val = 0x020B0093 },
         { .reg = 0x00009888, .val = 0x040B0033 },
         { .reg = 0x00009888, .val = 0x000B0000 },
         { .reg = 0x00009888, .val = 0x0A0C0022 },
         { .reg = 0x00009888, .val = 0x1E0C0030 },
         { .reg = 0x00009888, .val = 0x1C0C8000 },
         { .reg = 0x00009888, .val = 0x140C8000 },
         { .reg = 0x00009888, .val = 0x160C8000 },
         { .reg = 0x00009888, .val = 0x100DC017 },
         { .reg = 0x00009888, .val = 0x160D0013 },
         { .reg = 0x00009888, .val = 0x1C0D0081 },
         { .reg = 0x00009888, .val = 0x080D0082 },
         { .reg = 0x00009888, .val = 0x0A0D8102 },
         { .reg = 0x00009888, .val = 0x140D0000 },
         { .reg = 0x00009888, .val = 0x0C0D0000 },
         { .reg = 0x00009888, .val = 0x0E0D4000 },
         { .reg = 0x00009888, .val = 0x120D0000 },
         { .reg = 0x00009888, .val = 0x060DC000 },
         { .reg = 0x00009888, .val = 0x0C0EC1C5 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x100E4000 },
         { .reg = 0x00009888, .val = 0x120EC000 },
         { .reg = 0x00009888, .val = 0x140EC000 },
         { .reg = 0x00009888, .val = 0x160E4000 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x0A0EC000 },
         { .reg = 0x00009888, .val = 0x1C0F5555 },
         { .reg = 0x00009888, .val = 0x1E0F0554 },
         { .reg = 0x00009888, .val = 0x0E104000 },
         { .reg = 0x00009888, .val = 0x10104000 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x18104000 },
         { .reg = 0x00009888, .val = 0x1A104000 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x02104000 },
         { .reg = 0x00009888, .val = 0x04104000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x08104000 },
         { .reg = 0x00009888, .val = 0x0A104000 },
         { .reg = 0x00009888, .val = 0x0C104000 },
         { .reg = 0x00009888, .val = 0x0E024000 },
         { .reg = 0x00009888, .val = 0x10024000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020055 },
         { .reg = 0x00009888, .val = 0x02024000 },
         { .reg = 0x00009888, .val = 0x04024000 },
         { .reg = 0x00009888, .val = 0x06024000 },
         { .reg = 0x00009888, .val = 0x08024000 },
         { .reg = 0x00009888, .val = 0x0A024000 },
         { .reg = 0x00009888, .val = 0x0C024000 },
         { .reg = 0x00009888, .val = 0x1A032000 },
         { .reg = 0x00009888, .val = 0x1C032000 },
         { .reg = 0x00009888, .val = 0x2A035500 },
         { .reg = 0x00009888, .val = 0x2C030001 },
         { .reg = 0x00009888, .val = 0x02034000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x06034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x360036DB },
         { .reg = 0x00009888, .val = 0x380026DB },
         { .reg = 0x00009888, .val = 0x1A006000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x26050002 },
         { .reg = 0x00009888, .val = 0x0E0A8000 },
         { .reg = 0x00009888, .val = 0x100A8000 },
         { .reg = 0x00009888, .val = 0x120A4000 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x020A8000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009888, .val = 0x08081000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x4D100604 },
         { .reg = 0x00009888, .val = 0x4F103400 },
         { .reg = 0x00009888, .val = 0x51100200 },
         { .reg = 0x00009888, .val = 0x53100004 },
         { .reg = 0x00009888, .val = 0x55101400 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100005 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100400 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "3a4c7510-7725-4bf8-9eae-59115a2431c6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 35);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14112400 },
         { .reg = 0x00009888, .val = 0x14312400 },
         { .reg = 0x00009888, .val = 0x14512474 },
         { .reg = 0x00009888, .val = 0x14712400 },
         { .reg = 0x00009888, .val = 0x14912400 },
         { .reg = 0x00009888, .val = 0x14B12400 },
         { .reg = 0x00009888, .val = 0x240A0019 },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x24070002 },
         { .reg = 0x00009888, .val = 0x10110074 },
         { .reg = 0x00009888, .val = 0x08110000 },
         { .reg = 0x00009888, .val = 0x10128000 },
         { .reg = 0x00009888, .val = 0x1C130400 },
         { .reg = 0x00009888, .val = 0x12310074 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x08310000 },
         { .reg = 0x00009888, .val = 0x12328000 },
         { .reg = 0x00009888, .val = 0x1C330800 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x0A510000 },
         { .reg = 0x00009888, .val = 0x14528000 },
         { .reg = 0x00009888, .val = 0x1C531000 },
         { .reg = 0x00009888, .val = 0x16710074 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0A710000 },
         { .reg = 0x00009888, .val = 0x16728000 },
         { .reg = 0x00009888, .val = 0x1C732000 },
         { .reg = 0x00009888, .val = 0x0E910074 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x06910000 },
         { .reg = 0x00009888, .val = 0x0E928000 },
         { .reg = 0x00009888, .val = 0x1C930200 },
         { .reg = 0x00009888, .val = 0x00B10074 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1E0F0020 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A030600 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003488 },
         { .reg = 0x00009888, .val = 0x3800001B },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x24050038 },
         { .reg = 0x00009888, .val = 0x24060080 },
         { .reg = 0x00009888, .val = 0x180A00F7 },
         { .reg = 0x00009888, .val = 0x200A0000 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x5D100055 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D140007 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110055 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F101116 },
         { .reg = 0x00009888, .val = 0x51100401 },
         { .reg = 0x00009888, .val = 0x53100030 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x70800000 },
         { .reg = 0x0000DC40, .val = 0x007F0000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000002 },
         { .reg = 0x0000D94C, .val = 0x0000FFFD },
         { .reg = 0x0000DC08, .val = 0x00000002 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFD },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFFB },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFFB },
         { .reg = 0x0000D958, .val = 0x00000002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00000002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00000002 },
         { .reg = 0x0000D964, .val = 0x0000FFEF },
         { .reg = 0x0000DC20, .val = 0x00000002 },
         { .reg = 0x0000DC24, .val = 0x0000FFEF },
         { .reg = 0x0000D968, .val = 0x00000002 },
         { .reg = 0x0000D96C, .val = 0x0000FFDF },
         { .reg = 0x0000DC28, .val = 0x00000002 },
         { .reg = 0x0000DC2C, .val = 0x0000FFDF },
         { .reg = 0x0000D970, .val = 0x00000002 },
         { .reg = 0x0000D974, .val = 0x0000FFBF },
         { .reg = 0x0000DC30, .val = 0x00000002 },
         { .reg = 0x0000DC34, .val = 0x0000FFBF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__hdc_and_sf__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 224;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3) Unit: percent.";
         counter->symbol_name = "NonSamplerShader03AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 228;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__hdc_and_sf__non_sampler_shader04_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice4) Unit: percent.";
         counter->symbol_name = "NonSamplerShader04AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 232;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__hdc_and_sf__non_sampler_shader05_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice5) Unit: percent.";
         counter->symbol_name = "NonSamplerShader05AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 236;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "6f02479c-e9ca-4c2b-b1e6-216a9e1c5ef7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1E075000 },
         { .reg = 0x00009888, .val = 0x1A0700C0 },
         { .reg = 0x00009888, .val = 0x1E055000 },
         { .reg = 0x00009888, .val = 0x1A0500C0 },
         { .reg = 0x00009888, .val = 0x1E065000 },
         { .reg = 0x00009888, .val = 0x1A0600C0 },
         { .reg = 0x00009888, .val = 0x2A0A7300 },
         { .reg = 0x00009888, .val = 0x2C0A0000 },
         { .reg = 0x00009888, .val = 0x120800A0 },
         { .reg = 0x00009888, .val = 0x0A07C000 },
         { .reg = 0x00009888, .val = 0x0E070027 },
         { .reg = 0x00009888, .val = 0x10070000 },
         { .reg = 0x00009888, .val = 0x24070000 },
         { .reg = 0x00009888, .val = 0x2A032000 },
         { .reg = 0x00009888, .val = 0x2C030008 },
         { .reg = 0x00009888, .val = 0x18006000 },
         { .reg = 0x00009888, .val = 0x360036D8 },
         { .reg = 0x00009888, .val = 0x38003299 },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x2A010400 },
         { .reg = 0x00009888, .val = 0x2C010001 },
         { .reg = 0x00009888, .val = 0x0C05C000 },
         { .reg = 0x00009888, .val = 0x00052700 },
         { .reg = 0x00009888, .val = 0x10050000 },
         { .reg = 0x00009888, .val = 0x24050000 },
         { .reg = 0x00009888, .val = 0x22050000 },
         { .reg = 0x00009888, .val = 0x0C0600C0 },
         { .reg = 0x00009888, .val = 0x0E062700 },
         { .reg = 0x00009888, .val = 0x10060000 },
         { .reg = 0x00009888, .val = 0x24060000 },
         { .reg = 0x00009888, .val = 0x26060000 },
         { .reg = 0x00009888, .val = 0x000A0144 },
         { .reg = 0x00009888, .val = 0x0E0A0145 },
         { .reg = 0x00009888, .val = 0x100A0156 },
         { .reg = 0x00009888, .val = 0x040A014F },
         { .reg = 0x00009888, .val = 0x200A0000 },
         { .reg = 0x00009888, .val = 0x120A4000 },
         { .reg = 0x00009888, .val = 0x140A4000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009888, .val = 0x08081980 },
         { .reg = 0x00009888, .val = 0x0A080032 },
         { .reg = 0x00009888, .val = 0x10080000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x31152800 },
         { .reg = 0x00009888, .val = 0x331500A0 },
         { .reg = 0x00009888, .val = 0x31352800 },
         { .reg = 0x00009888, .val = 0x333500A0 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105FA5 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115005 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07150016 },
         { .reg = 0x00009888, .val = 0x09150096 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03168000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x0B350016 },
         { .reg = 0x00009888, .val = 0x0D350096 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x07364000 },
         { .reg = 0x00009888, .val = 0x47100400 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x4F100404 },
         { .reg = 0x00009888, .val = 0x51100202 },
         { .reg = 0x00009888, .val = 0x53100002 },
         { .reg = 0x00009888, .val = 0x55100204 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x49101404 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000038 },
         { .reg = 0x0000D944, .val = 0x0000FFF8 },
         { .reg = 0x0000DC00, .val = 0x00000038 },
         { .reg = 0x0000DC04, .val = 0x0000FFF8 },
         { .reg = 0x0000D948, .val = 0x000000C0 },
         { .reg = 0x0000D94C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC08, .val = 0x000000C0 },
         { .reg = 0x0000DC0C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__pixel_data00_ready__read;
      counter->name = "Slice0 Pipe0 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData00Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__ps_output00_available__read;
      counter->name = "Slice0 Pipe0 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe0 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput00Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__ps_output01_available__read;
      counter->name = "Slice0 Pipe1 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe1 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput01Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__ps_output02_available__read;
      counter->name = "Slice0 Pipe2 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe2 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput02Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__pixel_values00_ready__read;
      counter->name = "Slice0 Pipe0 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe0 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues00Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__pixel_values01_ready__read;
      counter->name = "Slice0 Pipe1 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe1 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues01Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__pixel_values02_ready__read;
      counter->name = "Slice0 Pipe2 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe2 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues02Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__gt_request_queue00_full__read;
      counter->name = "SQ00 is full";
      counter->desc = "The percentage of time when IDI0 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue00Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__gt_request_queue01_full__read;
      counter->name = "SQ01 is full";
      counter->desc = "The percentage of time when IDI0 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue01Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__gt_request_queue10_full__read;
      counter->name = "SQ10 is full";
      counter->desc = "The percentage of time when IDI1 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue10Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__rasterizer_and_pixel_backend__gt_request_queue11_full__read;
      counter->name = "SQ11 is full";
      counter->desc = "The percentage of time when IDI1 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue11Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_1";
   query->symbol_name = "L3_1";
   query->guid = "7e809cb4-6e90-44cc-9c57-6eff58ad360a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04022000 },
         { .reg = 0x00009888, .val = 0x06022800 },
         { .reg = 0x00009888, .val = 0x04002827 },
         { .reg = 0x00009888, .val = 0x0600202C },
         { .reg = 0x00009888, .val = 0x00020024 },
         { .reg = 0x00009888, .val = 0x0E020025 },
         { .reg = 0x00009888, .val = 0x10020026 },
         { .reg = 0x00009888, .val = 0x12020027 },
         { .reg = 0x00009888, .val = 0x1402002C },
         { .reg = 0x00009888, .val = 0x1602002D },
         { .reg = 0x00009888, .val = 0x1802002E },
         { .reg = 0x00009888, .val = 0x1A02002F },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x2C020000 },
         { .reg = 0x00009888, .val = 0x00034000 },
         { .reg = 0x00009888, .val = 0x1A032000 },
         { .reg = 0x00009888, .val = 0x1C032000 },
         { .reg = 0x00009888, .val = 0x1E032000 },
         { .reg = 0x00009888, .val = 0x2A035500 },
         { .reg = 0x00009888, .val = 0x1C000024 },
         { .reg = 0x00009888, .val = 0x1E000025 },
         { .reg = 0x00009888, .val = 0x02000026 },
         { .reg = 0x00009888, .val = 0x0800002D },
         { .reg = 0x00009888, .val = 0x0A00002E },
         { .reg = 0x00009888, .val = 0x0C00002F },
         { .reg = 0x00009888, .val = 0x360036D8 },
         { .reg = 0x00009888, .val = 0x18006000 },
         { .reg = 0x00009888, .val = 0x380000DB },
         { .reg = 0x00009888, .val = 0x1A000000 },
         { .reg = 0x00009888, .val = 0x34000000 },
         { .reg = 0x00009888, .val = 0x000A8000 },
         { .reg = 0x00009888, .val = 0x0E0A8000 },
         { .reg = 0x00009888, .val = 0x100A8000 },
         { .reg = 0x00009888, .val = 0x120A8000 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x4D100606 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100000 },
         { .reg = 0x00009888, .val = 0x55100600 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100006 },
         { .reg = 0x00009888, .val = 0x49100606 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_1__l30_bank0_input_available__read;
         counter->name = "Slice0 L3 Bank0 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank0 has input available Unit: percent.";
         counter->symbol_name = "L30Bank0InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_1__l30_bank1_input_available__read;
         counter->name = "Slice0 L3 Bank1 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank1 has input available Unit: percent.";
         counter->symbol_name = "L30Bank1InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_1__l30_bank4_input_available__read;
         counter->name = "Slice0 L3 Bank4 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank4 has input available Unit: percent.";
         counter->symbol_name = "L30Bank4InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_1__l30_bank5_input_available__read;
         counter->name = "Slice0 L3 Bank5 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank5 has input available Unit: percent.";
         counter->symbol_name = "L30Bank5InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_2";
   query->symbol_name = "L3_2";
   query->guid = "0dde1bb6-340f-4350-b398-2b0228573967";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04023027 },
         { .reg = 0x00009888, .val = 0x0602382C },
         { .reg = 0x00009888, .val = 0x04003000 },
         { .reg = 0x00009888, .val = 0x06003800 },
         { .reg = 0x00009888, .val = 0x1C020024 },
         { .reg = 0x00009888, .val = 0x1E020025 },
         { .reg = 0x00009888, .val = 0x02020026 },
         { .reg = 0x00009888, .val = 0x0802002D },
         { .reg = 0x00009888, .val = 0x0A02002E },
         { .reg = 0x00009888, .val = 0x0C02002F },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x02034000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x06034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x00000024 },
         { .reg = 0x00009888, .val = 0x0E000025 },
         { .reg = 0x00009888, .val = 0x10000026 },
         { .reg = 0x00009888, .val = 0x12000027 },
         { .reg = 0x00009888, .val = 0x1400002C },
         { .reg = 0x00009888, .val = 0x1600002D },
         { .reg = 0x00009888, .val = 0x1800002E },
         { .reg = 0x00009888, .val = 0x1A00602F },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009888, .val = 0x020A8000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100606 },
         { .reg = 0x00009888, .val = 0x51100606 },
         { .reg = 0x00009888, .val = 0x53100606 },
         { .reg = 0x00009888, .val = 0x55100006 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_2__l30_bank2_input_available__read;
         counter->name = "Slice0 L3 Bank2 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank2 has input available Unit: percent.";
         counter->symbol_name = "L30Bank2InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_2__l30_bank3_input_available__read;
         counter->name = "Slice0 L3 Bank3 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank3 has input available Unit: percent.";
         counter->symbol_name = "L30Bank3InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_2__l30_bank6_input_available__read;
         counter->name = "Slice0 L3 Bank6 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank6 has input available Unit: percent.";
         counter->symbol_name = "L30Bank6InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_2__l30_bank7_input_available__read;
         counter->name = "Slice0 L3 Bank7 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank7 has input available Unit: percent.";
         counter->symbol_name = "L30Bank7InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_3";
   query->symbol_name = "L3_3";
   query->guid = "8ecaeff2-78f4-4e29-b331-d757e6a74ed0";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04020200 },
         { .reg = 0x00009888, .val = 0x06020020 },
         { .reg = 0x00009888, .val = 0x04000200 },
         { .reg = 0x00009888, .val = 0x06000000 },
         { .reg = 0x00009888, .val = 0x08020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x06034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A000020 },
         { .reg = 0x00009888, .val = 0x0C000028 },
         { .reg = 0x00009888, .val = 0x36000000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34000300 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110550 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100606 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_3__l30_bank0_output_ready__read;
         counter->name = "Slice0 L3 Bank0 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank0 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank0OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_3__l30_bank4_output_ready__read;
         counter->name = "Slice0 L3 Bank4 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank4 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank4OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_4";
   query->symbol_name = "L3_4";
   query->guid = "30cd8433-f679-401e-b578-19e22975e84f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04020A00 },
         { .reg = 0x00009888, .val = 0x06020800 },
         { .reg = 0x00009888, .val = 0x04000A00 },
         { .reg = 0x00009888, .val = 0x06000820 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x1E000000 },
         { .reg = 0x00009888, .val = 0x34001800 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110550 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_4__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_4__l30_bank1_output_ready__read;
         counter->name = "Slice0 L3 Bank1 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank1 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank1OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_4__l30_bank5_output_ready__read;
         counter->name = "Slice0 L3 Bank5 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank5 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank5OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_l3_5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_5";
   query->symbol_name = "L3_5";
   query->guid = "e0efab61-c904-4354-9fc5-35e8b8bc7d20";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04021200 },
         { .reg = 0x00009888, .val = 0x06021000 },
         { .reg = 0x00009888, .val = 0x04001200 },
         { .reg = 0x00009888, .val = 0x06001020 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x1E000000 },
         { .reg = 0x00009888, .val = 0x34001800 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110550 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__l3_5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_5__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_5__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_5__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_5__l30_bank2_output_ready__read;
         counter->name = "Slice0 L3 Bank2 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank2 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank2OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_5__l30_bank6_output_ready__read;
         counter->name = "Slice0 L3 Bank6 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank6 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank6OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_l3_6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_6";
   query->symbol_name = "L3_6";
   query->guid = "4066ad45-4a68-4acf-86b2-fa5a6a914db7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04021A00 },
         { .reg = 0x00009888, .val = 0x06021800 },
         { .reg = 0x00009888, .val = 0x04001A00 },
         { .reg = 0x00009888, .val = 0x06001820 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x1E000000 },
         { .reg = 0x00009888, .val = 0x34001800 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110550 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__l3_6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__l3_6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_6__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_6__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__l3_6__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_6__l30_bank3_output_ready__read;
         counter->name = "Slice0 L3 Bank3 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank3 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank3OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__l3_6__l30_bank7_output_ready__read;
         counter->name = "Slice0 L3 Bank7 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank7 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank7OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_1";
   query->symbol_name = "Sampler_1";
   query->guid = "17e2be13-39fe-45f0-867c-0f83fcc51654";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1C121600 },
         { .reg = 0x00009888, .val = 0x18141600 },
         { .reg = 0x00009888, .val = 0x1C325600 },
         { .reg = 0x00009888, .val = 0x18341600 },
         { .reg = 0x00009888, .val = 0x1C521600 },
         { .reg = 0x00009888, .val = 0x185416A6 },
         { .reg = 0x00009888, .val = 0x1C721600 },
         { .reg = 0x00009888, .val = 0x18741600 },
         { .reg = 0x00009888, .val = 0x1C921600 },
         { .reg = 0x00009888, .val = 0x18941600 },
         { .reg = 0x00009888, .val = 0x1CB21600 },
         { .reg = 0x00009888, .val = 0x18B41600 },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x2407002B },
         { .reg = 0x00009888, .val = 0x04120086 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x02124000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x021400A6 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x00140000 },
         { .reg = 0x00009888, .val = 0x1A150020 },
         { .reg = 0x00009888, .val = 0x1E320086 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x1C3400A6 },
         { .reg = 0x00009888, .val = 0x10340000 },
         { .reg = 0x00009888, .val = 0x0E340000 },
         { .reg = 0x00009888, .val = 0x1C358000 },
         { .reg = 0x00009888, .val = 0x1A520086 },
         { .reg = 0x00009888, .val = 0x20520000 },
         { .reg = 0x00009888, .val = 0x18524000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x10540000 },
         { .reg = 0x00009888, .val = 0x0C540000 },
         { .reg = 0x00009888, .val = 0x1C550800 },
         { .reg = 0x00009888, .val = 0x16720086 },
         { .reg = 0x00009888, .val = 0x20720000 },
         { .reg = 0x00009888, .val = 0x14724000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x147400A6 },
         { .reg = 0x00009888, .val = 0x10740000 },
         { .reg = 0x00009888, .val = 0x0A740000 },
         { .reg = 0x00009888, .val = 0x1C750080 },
         { .reg = 0x00009888, .val = 0x12920086 },
         { .reg = 0x00009888, .val = 0x20920000 },
         { .reg = 0x00009888, .val = 0x10924000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x109400A6 },
         { .reg = 0x00009888, .val = 0x08940000 },
         { .reg = 0x00009888, .val = 0x1C950008 },
         { .reg = 0x00009888, .val = 0x0EB20086 },
         { .reg = 0x00009888, .val = 0x20B20000 },
         { .reg = 0x00009888, .val = 0x00B24000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30200 },
         { .reg = 0x00009888, .val = 0x00B400A6 },
         { .reg = 0x00009888, .val = 0x10B40000 },
         { .reg = 0x00009888, .val = 0x18B58000 },
         { .reg = 0x00009888, .val = 0x1E0F0028 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A03A500 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003248 },
         { .reg = 0x00009888, .val = 0x380024DB },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x220500A0 },
         { .reg = 0x00009888, .val = 0x24062800 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105005 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115005 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x47101600 },
         { .reg = 0x00009888, .val = 0x4D101000 },
         { .reg = 0x00009888, .val = 0x4F101603 },
         { .reg = 0x00009888, .val = 0x51100003 },
         { .reg = 0x00009888, .val = 0x53101404 },
         { .reg = 0x00009888, .val = 0x55101111 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100005 },
         { .reg = 0x00009888, .val = 0x49100511 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00001800 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00001800 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00006000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00006000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_1__sampler00_input_available__read;
         counter->name = "Slice0 DualSubslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_1__sampler01_input_available__read;
         counter->name = "Slice0 DualSubslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_1__sampler02_input_available__read;
         counter->name = "Slice0 DualSubslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_1__sampler03_input_available__read;
         counter->name = "Slice0 DualSubslice3 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice3 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler03InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_1__sampler04_input_available__read;
         counter->name = "Slice0 DualSubslice4 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice4 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler04InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_1__sampler05_input_available__read;
         counter->name = "Slice0 DualSubslice5 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice5 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler05InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_sampler_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_2";
   query->symbol_name = "Sampler_2";
   query->guid = "0c3c3235-2e91-4ef0-8562-4ea1501e8612";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0C123E00 },
         { .reg = 0x00009888, .val = 0x04143E00 },
         { .reg = 0x00009888, .val = 0x0C323E00 },
         { .reg = 0x00009888, .val = 0x04343E00 },
         { .reg = 0x00009888, .val = 0x0C523E00 },
         { .reg = 0x00009888, .val = 0x04543E00 },
         { .reg = 0x00009888, .val = 0x0C723E00 },
         { .reg = 0x00009888, .val = 0x04743E00 },
         { .reg = 0x00009888, .val = 0x0C923E00 },
         { .reg = 0x00009888, .val = 0x04943E00 },
         { .reg = 0x00009888, .val = 0x0CB23E00 },
         { .reg = 0x00009888, .val = 0x04B43E00 },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x2407002B },
         { .reg = 0x00009888, .val = 0x04120033 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x02124000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x02140013 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x00140000 },
         { .reg = 0x00009888, .val = 0x1A150020 },
         { .reg = 0x00009888, .val = 0x1E320033 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x1C324000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x1C340013 },
         { .reg = 0x00009888, .val = 0x10340000 },
         { .reg = 0x00009888, .val = 0x0E340000 },
         { .reg = 0x00009888, .val = 0x1C358000 },
         { .reg = 0x00009888, .val = 0x1A520033 },
         { .reg = 0x00009888, .val = 0x20520000 },
         { .reg = 0x00009888, .val = 0x18524000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x18540013 },
         { .reg = 0x00009888, .val = 0x10540000 },
         { .reg = 0x00009888, .val = 0x0C540000 },
         { .reg = 0x00009888, .val = 0x1C550800 },
         { .reg = 0x00009888, .val = 0x16720033 },
         { .reg = 0x00009888, .val = 0x20720000 },
         { .reg = 0x00009888, .val = 0x14724000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x14740013 },
         { .reg = 0x00009888, .val = 0x10740000 },
         { .reg = 0x00009888, .val = 0x0A740000 },
         { .reg = 0x00009888, .val = 0x1C750080 },
         { .reg = 0x00009888, .val = 0x12920033 },
         { .reg = 0x00009888, .val = 0x20920000 },
         { .reg = 0x00009888, .val = 0x10924000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x10940013 },
         { .reg = 0x00009888, .val = 0x08940000 },
         { .reg = 0x00009888, .val = 0x1C950008 },
         { .reg = 0x00009888, .val = 0x0EB20033 },
         { .reg = 0x00009888, .val = 0x20B20000 },
         { .reg = 0x00009888, .val = 0x00B24000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30200 },
         { .reg = 0x00009888, .val = 0x00B40013 },
         { .reg = 0x00009888, .val = 0x10B40000 },
         { .reg = 0x00009888, .val = 0x18B58000 },
         { .reg = 0x00009888, .val = 0x1E0F0028 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A03A500 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003248 },
         { .reg = 0x00009888, .val = 0x380024DB },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x220500A0 },
         { .reg = 0x00009888, .val = 0x24062800 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105005 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115005 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x47101600 },
         { .reg = 0x00009888, .val = 0x4D101000 },
         { .reg = 0x00009888, .val = 0x4F101603 },
         { .reg = 0x00009888, .val = 0x51100003 },
         { .reg = 0x00009888, .val = 0x53101404 },
         { .reg = 0x00009888, .val = 0x55101111 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100005 },
         { .reg = 0x00009888, .val = 0x49100511 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00001800 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00001800 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00006000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00006000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__sampler_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__sampler_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__sampler_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_2__sampler00_output_ready__read;
         counter->name = "Slice0 DualSubslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_2__sampler01_output_ready__read;
         counter->name = "Slice0 DualSubslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_2__sampler02_output_ready__read;
         counter->name = "Slice0 DualSubslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_2__sampler03_output_ready__read;
         counter->name = "Slice0 DualSubslice3 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice3 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler03OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_2__sampler04_output_ready__read;
         counter->name = "Slice0 DualSubslice4 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice4 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler04OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__sampler_2__sampler05_output_ready__read;
         counter->name = "Slice0 DualSubslice5 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice5 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler05OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "ee6f5fa3-13a8-4842-8b34-f7541a0f76a3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x2611001C },
         { .reg = 0x00009888, .val = 0x2631001C },
         { .reg = 0x00009888, .val = 0x2651001C },
         { .reg = 0x00009888, .val = 0x2671001C },
         { .reg = 0x00009888, .val = 0x2691001C },
         { .reg = 0x00009888, .val = 0x26B1001C },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x24070002 },
         { .reg = 0x00009888, .val = 0x16110103 },
         { .reg = 0x00009888, .val = 0x1C110104 },
         { .reg = 0x00009888, .val = 0x1E110105 },
         { .reg = 0x00009888, .val = 0x02110106 },
         { .reg = 0x00009888, .val = 0x04110107 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x0A110000 },
         { .reg = 0x00009888, .val = 0x0E110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x16128000 },
         { .reg = 0x00009888, .val = 0x1C128000 },
         { .reg = 0x00009888, .val = 0x1E128000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x1C132000 },
         { .reg = 0x00009888, .val = 0x1E130003 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x14310103 },
         { .reg = 0x00009888, .val = 0x06310104 },
         { .reg = 0x00009888, .val = 0x08310105 },
         { .reg = 0x00009888, .val = 0x0A310106 },
         { .reg = 0x00009888, .val = 0x0C310107 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x02310000 },
         { .reg = 0x00009888, .val = 0x04310000 },
         { .reg = 0x00009888, .val = 0x14328000 },
         { .reg = 0x00009888, .val = 0x06328000 },
         { .reg = 0x00009888, .val = 0x08328000 },
         { .reg = 0x00009888, .val = 0x0A328000 },
         { .reg = 0x00009888, .val = 0x0C328000 },
         { .reg = 0x00009888, .val = 0x1C331100 },
         { .reg = 0x00009888, .val = 0x16338000 },
         { .reg = 0x00009888, .val = 0x18338000 },
         { .reg = 0x00009888, .val = 0x1A338000 },
         { .reg = 0x00009888, .val = 0x12510103 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x08510000 },
         { .reg = 0x00009888, .val = 0x12528000 },
         { .reg = 0x00009888, .val = 0x1C530800 },
         { .reg = 0x00009888, .val = 0x10710103 },
         { .reg = 0x00009888, .val = 0x08710000 },
         { .reg = 0x00009888, .val = 0x10728000 },
         { .reg = 0x00009888, .val = 0x1C730400 },
         { .reg = 0x00009888, .val = 0x0E910103 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x06910000 },
         { .reg = 0x00009888, .val = 0x0E928000 },
         { .reg = 0x00009888, .val = 0x1C930200 },
         { .reg = 0x00009888, .val = 0x00B10103 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1C0F8000 },
         { .reg = 0x00009888, .val = 0x10104000 },
         { .reg = 0x00009888, .val = 0x10024000 },
         { .reg = 0x00009888, .val = 0x1C032000 },
         { .reg = 0x00009888, .val = 0x1E034000 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x360026CA },
         { .reg = 0x00009888, .val = 0x38002402 },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x1E004000 },
         { .reg = 0x00009888, .val = 0x34001200 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x240582C0 },
         { .reg = 0x00009888, .val = 0x26050002 },
         { .reg = 0x00009888, .val = 0x2205FFA0 },
         { .reg = 0x00009888, .val = 0x24060020 },
         { .reg = 0x00009888, .val = 0x100A8000 },
         { .reg = 0x00009888, .val = 0x120A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101415 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D140033 },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111415 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x47101600 },
         { .reg = 0x00009888, .val = 0x4D100111 },
         { .reg = 0x00009888, .val = 0x4F100006 },
         { .reg = 0x00009888, .val = 0x51101114 },
         { .reg = 0x00009888, .val = 0x53100001 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101110 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x49101111 },
         { .reg = 0x00009888, .val = 0x4B101111 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000002 },
         { .reg = 0x0000D94C, .val = 0x0000FFFD },
         { .reg = 0x0000DC08, .val = 0x00000002 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFD },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFFB },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFFB },
         { .reg = 0x0000D958, .val = 0x00000002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00000002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00000002 },
         { .reg = 0x0000D964, .val = 0x0000FFEF },
         { .reg = 0x0000DC20, .val = 0x00000002 },
         { .reg = 0x0000DC24, .val = 0x0000FFEF },
         { .reg = 0x0000D968, .val = 0x00000002 },
         { .reg = 0x0000D96C, .val = 0x0000FFDF },
         { .reg = 0x0000DC28, .val = 0x00000002 },
         { .reg = 0x0000DC2C, .val = 0x0000FFDF },
         { .reg = 0x0000D970, .val = 0x00007800 },
         { .reg = 0x0000D974, .val = 0x0000F0FF },
         { .reg = 0x0000DC30, .val = 0x00007800 },
         { .reg = 0x0000DC34, .val = 0x0000F0FF },
         { .reg = 0x0000D978, .val = 0x00078000 },
         { .reg = 0x0000D97C, .val = 0x00000FFF },
         { .reg = 0x0000DC38, .val = 0x00078000 },
         { .reg = 0x0000DC3C, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__non_ps_thread03_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__non_ps_thread04_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__non_ps_thread05_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header00_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header00_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header01_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header01_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header00_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader00Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 148;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_1__thread_header01_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader01Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 152;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "f1577929-9215-488b-9899-d12b6e799743";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 24);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x24110340 },
         { .reg = 0x00009888, .val = 0x24310340 },
         { .reg = 0x00009888, .val = 0x24510340 },
         { .reg = 0x00009888, .val = 0x24710340 },
         { .reg = 0x00009888, .val = 0x24910340 },
         { .reg = 0x00009888, .val = 0x24B10340 },
         { .reg = 0x00009888, .val = 0x26B1001C },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x2407002B },
         { .reg = 0x00009888, .val = 0x2207FF00 },
         { .reg = 0x00009888, .val = 0x021100F3 },
         { .reg = 0x00009888, .val = 0x041100F2 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x1C3100F3 },
         { .reg = 0x00009888, .val = 0x1E3100F2 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x0E310000 },
         { .reg = 0x00009888, .val = 0x1C328000 },
         { .reg = 0x00009888, .val = 0x1E328000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x185100F3 },
         { .reg = 0x00009888, .val = 0x1A5100F2 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x0C510000 },
         { .reg = 0x00009888, .val = 0x18528000 },
         { .reg = 0x00009888, .val = 0x1A528000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x147100F3 },
         { .reg = 0x00009888, .val = 0x167100F2 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0A710000 },
         { .reg = 0x00009888, .val = 0x14728000 },
         { .reg = 0x00009888, .val = 0x16728000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x109100F3 },
         { .reg = 0x00009888, .val = 0x129100F2 },
         { .reg = 0x00009888, .val = 0x08910000 },
         { .reg = 0x00009888, .val = 0x10928000 },
         { .reg = 0x00009888, .val = 0x12928000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x00B100F3 },
         { .reg = 0x00009888, .val = 0x0EB100F2 },
         { .reg = 0x00009888, .val = 0x06B10104 },
         { .reg = 0x00009888, .val = 0x08B10105 },
         { .reg = 0x00009888, .val = 0x0AB10106 },
         { .reg = 0x00009888, .val = 0x0CB10107 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x02B10000 },
         { .reg = 0x00009888, .val = 0x04B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x0EB28000 },
         { .reg = 0x00009888, .val = 0x06B28000 },
         { .reg = 0x00009888, .val = 0x08B28000 },
         { .reg = 0x00009888, .val = 0x0AB28000 },
         { .reg = 0x00009888, .val = 0x0CB28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30300 },
         { .reg = 0x00009888, .val = 0x16B38000 },
         { .reg = 0x00009888, .val = 0x18B38000 },
         { .reg = 0x00009888, .val = 0x1AB38000 },
         { .reg = 0x00009888, .val = 0x1E0F0028 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A03A500 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003249 },
         { .reg = 0x00009888, .val = 0x380024DB },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x1E002000 },
         { .reg = 0x00009888, .val = 0x34000900 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x220500A0 },
         { .reg = 0x00009888, .val = 0x24062800 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x47101600 },
         { .reg = 0x00009888, .val = 0x4D100606 },
         { .reg = 0x00009888, .val = 0x4F100606 },
         { .reg = 0x00009888, .val = 0x51100006 },
         { .reg = 0x00009888, .val = 0x53100400 },
         { .reg = 0x00009888, .val = 0x55101114 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x49100111 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x70800000 },
         { .reg = 0x0000DC40, .val = 0x007F0000 },
         { .reg = 0x0000D940, .val = 0x00000000 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000000 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000000 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000000 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000000 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000000 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000000 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000000 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00000000 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00000000 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00000000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00000000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
         { .reg = 0x0000D970, .val = 0x00078000 },
         { .reg = 0x0000D974, .val = 0x00000FFF },
         { .reg = 0x0000DC30, .val = 0x00078000 },
         { .reg = 0x0000DC34, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__ps_thread03_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__ps_thread04_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__ps_thread05_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__thread_header05_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader05Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__thread_header05_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__thread_header05_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__thread_header05_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_2__thread_header05_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_tdl_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_3";
   query->symbol_name = "TDL_3";
   query->guid = "d5890d02-b2be-4742-a16e-17190a92a301";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 28);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x2651001C },
         { .reg = 0x00009888, .val = 0x2671001C },
         { .reg = 0x00009888, .val = 0x2691001C },
         { .reg = 0x00009888, .val = 0x24072A80 },
         { .reg = 0x00009888, .val = 0x06510107 },
         { .reg = 0x00009888, .val = 0x08510106 },
         { .reg = 0x00009888, .val = 0x0A510105 },
         { .reg = 0x00009888, .val = 0x0C510104 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x02510000 },
         { .reg = 0x00009888, .val = 0x04510000 },
         { .reg = 0x00009888, .val = 0x06528000 },
         { .reg = 0x00009888, .val = 0x08528000 },
         { .reg = 0x00009888, .val = 0x0A528000 },
         { .reg = 0x00009888, .val = 0x0C528000 },
         { .reg = 0x00009888, .val = 0x16538000 },
         { .reg = 0x00009888, .val = 0x18538000 },
         { .reg = 0x00009888, .val = 0x1A538000 },
         { .reg = 0x00009888, .val = 0x1C530100 },
         { .reg = 0x00009888, .val = 0x1C710107 },
         { .reg = 0x00009888, .val = 0x1E710106 },
         { .reg = 0x00009888, .val = 0x02710105 },
         { .reg = 0x00009888, .val = 0x04710104 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0E710000 },
         { .reg = 0x00009888, .val = 0x00710000 },
         { .reg = 0x00009888, .val = 0x1C728000 },
         { .reg = 0x00009888, .val = 0x1E728000 },
         { .reg = 0x00009888, .val = 0x02728000 },
         { .reg = 0x00009888, .val = 0x04728000 },
         { .reg = 0x00009888, .val = 0x1E730003 },
         { .reg = 0x00009888, .val = 0x12738000 },
         { .reg = 0x00009888, .val = 0x14738000 },
         { .reg = 0x00009888, .val = 0x14910107 },
         { .reg = 0x00009888, .val = 0x16910106 },
         { .reg = 0x00009888, .val = 0x18910105 },
         { .reg = 0x00009888, .val = 0x1A910104 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x0A910000 },
         { .reg = 0x00009888, .val = 0x0C910000 },
         { .reg = 0x00009888, .val = 0x14928000 },
         { .reg = 0x00009888, .val = 0x16928000 },
         { .reg = 0x00009888, .val = 0x18928000 },
         { .reg = 0x00009888, .val = 0x1A928000 },
         { .reg = 0x00009888, .val = 0x1C93F000 },
         { .reg = 0x00009888, .val = 0x1E0F2800 },
         { .reg = 0x00009888, .val = 0x1C0F000A },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x02104000 },
         { .reg = 0x00009888, .val = 0x04104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x02024000 },
         { .reg = 0x00009888, .val = 0x04024000 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x02034000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x06038000 },
         { .reg = 0x00009888, .val = 0x08038000 },
         { .reg = 0x00009888, .val = 0x0A038000 },
         { .reg = 0x00009888, .val = 0x18034000 },
         { .reg = 0x00009888, .val = 0x36001003 },
         { .reg = 0x00009888, .val = 0x38003649 },
         { .reg = 0x00009888, .val = 0x1A006000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x2206AA00 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009888, .val = 0x020A8000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5D101554 },
         { .reg = 0x00009888, .val = 0x5B100555 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x61111554 },
         { .reg = 0x00009888, .val = 0x5F110555 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x51100600 },
         { .reg = 0x00009888, .val = 0x53100606 },
         { .reg = 0x00009888, .val = 0x55100006 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100404 },
         { .reg = 0x00009888, .val = 0x4D100404 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x70800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00070000 },
         { .reg = 0x0000D940, .val = 0x00078000 },
         { .reg = 0x0000D944, .val = 0x00000FFF },
         { .reg = 0x0000DC00, .val = 0x00078000 },
         { .reg = 0x0000DC04, .val = 0x00000FFF },
         { .reg = 0x0000D948, .val = 0x00007800 },
         { .reg = 0x0000D94C, .val = 0x0000F0FF },
         { .reg = 0x0000DC08, .val = 0x00007800 },
         { .reg = 0x0000DC0C, .val = 0x0000F0FF },
         { .reg = 0x0000D950, .val = 0x00000780 },
         { .reg = 0x0000D954, .val = 0x0000FF0F },
         { .reg = 0x0000DC10, .val = 0x00000780 },
         { .reg = 0x0000DC14, .val = 0x0000FF0F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__tdl_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__tdl_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__tdl_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header02_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader02Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header03_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader03Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header04_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader04Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header02_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header02_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header03_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header03_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header03_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header03_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header04_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header04_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header04_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = tglgt2__tdl_3__thread_header04_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 148;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "GpuBusyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "19fe64eb-ac4f-45c6-b2b9-af728b21753b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 22);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x0E101200 },
         { .reg = 0x00009888, .val = 0x040E0043 },
         { .reg = 0x00009888, .val = 0x0A0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x0C0E0000 },
         { .reg = 0x00009888, .val = 0x1C0F0104 },
         { .reg = 0x00009888, .val = 0x08100053 },
         { .reg = 0x00009888, .val = 0x20100000 },
         { .reg = 0x00009888, .val = 0x04104000 },
         { .reg = 0x00009888, .val = 0x0A104000 },
         { .reg = 0x00009888, .val = 0x04024000 },
         { .reg = 0x00009888, .val = 0x08024000 },
         { .reg = 0x00009888, .val = 0x0A024000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x15102400 },
         { .reg = 0x00009888, .val = 0x230B0120 },
         { .reg = 0x00009888, .val = 0x15182400 },
         { .reg = 0x00009888, .val = 0x232B0120 },
         { .reg = 0x00009888, .val = 0x17100023 },
         { .reg = 0x00009888, .val = 0x11100000 },
         { .reg = 0x00009888, .val = 0x5D101000 },
         { .reg = 0x00009888, .val = 0x5B100545 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x1B140C00 },
         { .reg = 0x00009888, .val = 0x61112000 },
         { .reg = 0x00009888, .val = 0x5F110945 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x13138000 },
         { .reg = 0x00009888, .val = 0x1B0D0040 },
         { .reg = 0x00009888, .val = 0x1F0B00D3 },
         { .reg = 0x00009888, .val = 0x190B0000 },
         { .reg = 0x00009888, .val = 0x170B0000 },
         { .reg = 0x00009888, .val = 0x1B170002 },
         { .reg = 0x00009888, .val = 0x0D174000 },
         { .reg = 0x00009888, .val = 0x071800A3 },
         { .reg = 0x00009888, .val = 0x11180000 },
         { .reg = 0x00009888, .val = 0x032D4000 },
         { .reg = 0x00009888, .val = 0x032B00D3 },
         { .reg = 0x00009888, .val = 0x192B0000 },
         { .reg = 0x00009888, .val = 0x092B0000 },
         { .reg = 0x00009888, .val = 0x55103000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49107003 },
         { .reg = 0x00009888, .val = 0x4B100402 },
         { .reg = 0x00009888, .val = 0x4D100230 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00024002 },
         { .reg = 0x0000D944, .val = 0x0000B7FF },
         { .reg = 0x0000DC00, .val = 0x00024002 },
         { .reg = 0x0000DC04, .val = 0x0000B7FF },
         { .reg = 0x0000D948, .val = 0x0007F000 },
         { .reg = 0x0000D94C, .val = 0x000001FF },
         { .reg = 0x0000DC08, .val = 0x0007F000 },
         { .reg = 0x0000DC0C, .val = 0x000001FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__gpu_busyness__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__posh_engine_busy__read;
      counter->name = "Posh Ring Busy";
      counter->desc = "The percentage of time when posh command streamer was busy. Unit: percent.";
      counter->symbol_name = "PoshEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__gpu_busyness__any_engine_busy__read;
      counter->name = "Any Engine Busy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity1";
   query->symbol_name = "EuActivity1";
   query->guid = "414ff049-80d3-48c0-b79a-bd8eed097a06";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00810710 },
         { .reg = 0x0000E558, .val = 0x00A10910 },
         { .reg = 0x0000E658, .val = 0x00850750 },
         { .reg = 0x0000E758, .val = 0x00A50950 },
         { .reg = 0x0000E45C, .val = 0x00802702 },
         { .reg = 0x0000E55C, .val = 0x00A02902 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity1__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity1__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity1__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity1__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity1__vs_fpu_active__read;
      counter->name = "VS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity1__ps_fpu_active__read;
      counter->name = "PS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpuActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity1__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity2";
   query->symbol_name = "EuActivity2";
   query->guid = "fb65c819-7ac2-4c69-aa9d-b72a18440705";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00862762 },
         { .reg = 0x0000E558, .val = 0x00A62962 },
         { .reg = 0x0000E658, .val = 0x00860760 },
         { .reg = 0x0000E758, .val = 0x00A60960 },
         { .reg = 0x0000E45C, .val = 0x00861761 },
         { .reg = 0x0000E55C, .val = 0x00A61961 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity2__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity2__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity2__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity2__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity2__cs_em_active__read;
      counter->name = "CS EM Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsEmActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity2__cs_fpu_active__read;
      counter->name = "CS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity2__cs_send_active__read;
      counter->name = "CS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsSendActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity3";
   query->symbol_name = "EuActivity3";
   query->guid = "7e1c6469-9de7-491a-a7c5-1bd8f9966826";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00811711 },
         { .reg = 0x0000E558, .val = 0x00A11911 },
         { .reg = 0x0000E658, .val = 0x00851751 },
         { .reg = 0x0000E758, .val = 0x00A51951 },
         { .reg = 0x0000E45C, .val = 0x00852752 },
         { .reg = 0x0000E55C, .val = 0x00A52952 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity3__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity3__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity3__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity3__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity3__vs_em_active__read;
      counter->name = "VS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsEmActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity3__ps_em_active__read;
      counter->name = "PS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsEmActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity4";
   query->symbol_name = "EuActivity4";
   query->guid = "a43f80cd-5cc1-4a2c-a750-40594af2b661";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00820720 },
         { .reg = 0x0000E558, .val = 0x00A20920 },
         { .reg = 0x0000E658, .val = 0x00830730 },
         { .reg = 0x0000E758, .val = 0x00A30930 },
         { .reg = 0x0000E45C, .val = 0x00812712 },
         { .reg = 0x0000E55C, .val = 0x00A12912 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity4__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity4__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity4__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity4__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity4__hs_fpu_active__read;
      counter->name = "HS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsFpuActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity4__ds_fpu_active__read;
      counter->name = "DS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsFpuActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity5";
   query->symbol_name = "EuActivity5";
   query->guid = "7e6e555c-aa5b-4c8d-992a-454a5a335c6e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00821721 },
         { .reg = 0x0000E558, .val = 0x00A21921 },
         { .reg = 0x0000E658, .val = 0x00831731 },
         { .reg = 0x0000E758, .val = 0x00A31931 },
         { .reg = 0x0000E45C, .val = 0x00822722 },
         { .reg = 0x0000E55C, .val = 0x00A22922 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity5__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity5__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity5__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity5__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity5__hs_em_active__read;
      counter->name = "HS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a hull shader instructions. Unit: percent.";
      counter->symbol_name = "HsEmActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity5__ds_em_active__read;
      counter->name = "DS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsEmActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity5__hs_send_active__read;
      counter->name = "HS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsSendActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity6";
   query->symbol_name = "EuActivity6";
   query->guid = "f3723f39-ecf4-4ff2-a4c4-80e87876b86f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00840740 },
         { .reg = 0x0000E558, .val = 0x00A40940 },
         { .reg = 0x0000E658, .val = 0x00841741 },
         { .reg = 0x0000E758, .val = 0x00A41941 },
         { .reg = 0x0000E45C, .val = 0x00842742 },
         { .reg = 0x0000E55C, .val = 0x00A42942 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity6__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity6__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity6__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity6__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity6__gs_fpu_active__read;
      counter->name = "GS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsFpuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity6__gs_em_active__read;
      counter->name = "GS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsEmActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity6__gs_send_active__read;
      counter->name = "GS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a geometry shader instruction. Unit: percent.";
      counter->symbol_name = "GsSendActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity7_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity7";
   query->symbol_name = "EuActivity7";
   query->guid = "c0d2cd0a-e2be-4b12-916d-2f3aba0ebf9e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00803703 },
         { .reg = 0x0000E558, .val = 0x00A03903 },
         { .reg = 0x0000E658, .val = 0x00800700 },
         { .reg = 0x0000E758, .val = 0x00A00900 },
         { .reg = 0x0000E45C, .val = 0x00801701 },
         { .reg = 0x0000E55C, .val = 0x00A01901 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity7__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__fpu_active__read;
      counter->name = "EU FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "FpuActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__em_active__read;
      counter->name = "EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__eu_fpu_em_active__read;
      counter->name = "EU FPU And EM Pipes Active";
      counter->desc = "The percentage of time in which EU FPU and EM pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuEmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity7__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity7__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_eu_activity8_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity8";
   query->symbol_name = "EuActivity8";
   query->guid = "1fbbd218-693c-4035-b4c0-ce4dd139d828";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 16);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47103000 },
         { .reg = 0x00009888, .val = 0x49103535 },
         { .reg = 0x00009888, .val = 0x4B103535 },
         { .reg = 0x00009888, .val = 0x4D100535 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00832732 },
         { .reg = 0x0000E558, .val = 0x00A32932 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__eu_activity8__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity8__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity8__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity8__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity8__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__eu_activity8__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = tglgt2__eu_activity8__ds_send_active__read;
      counter->name = "DS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a domain shader instruction. Unit: percent.";
      counter->symbol_name = "DsSendActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
tglgt2_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TestOa";
   query->symbol_name = "TestOa";
   query->guid = "6607f034-d053-40d1-8215-67c07f3041bb";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 13);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x280E0000 },
         { .reg = 0x00009888, .val = 0x1E0E0147 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020100 },
         { .reg = 0x00009888, .val = 0x2C030004 },
         { .reg = 0x00009888, .val = 0x38003000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x49110000 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x1D140020 },
         { .reg = 0x00009888, .val = 0x1D1103A3 },
         { .reg = 0x00009888, .val = 0x01110000 },
         { .reg = 0x00009888, .val = 0x61111000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100630 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000004 },
         { .reg = 0x0000D944, .val = 0x0000FFFF },
         { .reg = 0x0000DC00, .val = 0x00000004 },
         { .reg = 0x0000DC04, .val = 0x0000FFFF },
         { .reg = 0x0000D948, .val = 0x00000003 },
         { .reg = 0x0000D94C, .val = 0x0000FFFF },
         { .reg = 0x0000DC08, .val = 0x00000003 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFF },
         { .reg = 0x0000D950, .val = 0x00000007 },
         { .reg = 0x0000D954, .val = 0x0000FFFF },
         { .reg = 0x0000DC10, .val = 0x00000007 },
         { .reg = 0x0000DC14, .val = 0x0000FFFF },
         { .reg = 0x0000D958, .val = 0x00100002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00100002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00100002 },
         { .reg = 0x0000D964, .val = 0x0000FFCF },
         { .reg = 0x0000DC20, .val = 0x00100002 },
         { .reg = 0x0000DC24, .val = 0x0000FFCF },
         { .reg = 0x0000D968, .val = 0x00100082 },
         { .reg = 0x0000D96C, .val = 0x0000FFEF },
         { .reg = 0x0000DC28, .val = 0x00100082 },
         { .reg = 0x0000DC2C, .val = 0x0000FFEF },
         { .reg = 0x0000D970, .val = 0x001000C2 },
         { .reg = 0x0000D974, .val = 0x0000FFE7 },
         { .reg = 0x0000DC30, .val = 0x001000C2 },
         { .reg = 0x0000DC34, .val = 0x0000FFE7 },
         { .reg = 0x0000D978, .val = 0x00100001 },
         { .reg = 0x0000D97C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC38, .val = 0x00100001 },
         { .reg = 0x0000DC3C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = tglgt2__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.16666 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.6666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1 in IOStream or in OAG query mode Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = tglgt2__test_oa__counter9__read;
      counter->name = "TestCounter9 - OAR enable";
      counter->desc = "HW test counter 9. Should be equal to 1 in query. Unit: events.";
      counter->symbol_name = "Counter9";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_tglgt2(struct intel_perf_config *perf)
{
   tglgt2_register_render_basic_counter_query(perf);
   tglgt2_register_compute_basic_counter_query(perf);
   tglgt2_register_render_pipe_profile_counter_query(perf);
   tglgt2_register_hdc_and_sf_counter_query(perf);
   tglgt2_register_rasterizer_and_pixel_backend_counter_query(perf);
   tglgt2_register_l3_1_counter_query(perf);
   tglgt2_register_l3_2_counter_query(perf);
   tglgt2_register_l3_3_counter_query(perf);
   tglgt2_register_l3_4_counter_query(perf);
   tglgt2_register_l3_5_counter_query(perf);
   tglgt2_register_l3_6_counter_query(perf);
   tglgt2_register_sampler_1_counter_query(perf);
   tglgt2_register_sampler_2_counter_query(perf);
   tglgt2_register_tdl_1_counter_query(perf);
   tglgt2_register_tdl_2_counter_query(perf);
   tglgt2_register_tdl_3_counter_query(perf);
   tglgt2_register_gpu_busyness_counter_query(perf);
   tglgt2_register_eu_activity1_counter_query(perf);
   tglgt2_register_eu_activity2_counter_query(perf);
   tglgt2_register_eu_activity3_counter_query(perf);
   tglgt2_register_eu_activity4_counter_query(perf);
   tglgt2_register_eu_activity5_counter_query(perf);
   tglgt2_register_eu_activity6_counter_query(perf);
   tglgt2_register_eu_activity7_counter_query(perf);
   tglgt2_register_eu_activity8_counter_query(perf);
   tglgt2_register_test_oa_counter_query(perf);
}


static void
rkl_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "5b492c36-73f7-4827-83b3-c6863697ec51";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 34);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14150001 },
         { .reg = 0x00009888, .val = 0x16150020 },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x10124000 },
         { .reg = 0x00009888, .val = 0x12124000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C130E00 },
         { .reg = 0x00009888, .val = 0x00150050 },
         { .reg = 0x00009888, .val = 0x06157000 },
         { .reg = 0x00009888, .val = 0x08157151 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x18150000 },
         { .reg = 0x00009888, .val = 0x1C150000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x120B4000 },
         { .reg = 0x00009888, .val = 0x100C8000 },
         { .reg = 0x00009888, .val = 0x1E0C000E },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x0A0D8000 },
         { .reg = 0x00009888, .val = 0x0C0DC000 },
         { .reg = 0x00009888, .val = 0x000E8000 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x100E8000 },
         { .reg = 0x00009888, .val = 0x120E8000 },
         { .reg = 0x00009888, .val = 0x000F4000 },
         { .reg = 0x00009888, .val = 0x060F8000 },
         { .reg = 0x00009888, .val = 0x080FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D47 },
         { .reg = 0x00009888, .val = 0x09151536 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B1050BB },
         { .reg = 0x00009888, .val = 0x5D102C01 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110001 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x1F150137 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x0F168000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x1D350137 },
         { .reg = 0x00009888, .val = 0x03350147 },
         { .reg = 0x00009888, .val = 0x07350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x0F364000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101810 },
         { .reg = 0x00009888, .val = 0x57100008 },
         { .reg = 0x00009888, .val = 0x49101818 },
         { .reg = 0x00009888, .val = 0x4B100818 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__render_basic__sampler00_busy__read;
         counter->name = "Sampler00 Busy";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__render_basic__sampler00_bottleneck__read;
         counter->name = "Sampler Slice0 Dualsubslice0 is bottleneck";
         counter->desc = "The percentage of time when sampler slice0 dualsubslice0 is bottleneck Unit: percent.";
         counter->symbol_name = "Sampler00Bottleneck";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__render_basic__samplers_busy__read;
         counter->name = "Samplers Busy";
         counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplersBusy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__render_basic__sampler_bottleneck__read;
         counter->name = "Samplers Bottleneck";
         counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplerBottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "09740672-ccc3-48da-a5bc-64994fe3a0a7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 30);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "833bb0f2-9483-498e-b3bf-ebf026a6301a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x00123E00 },
         { .reg = 0x00009888, .val = 0x060B00B3 },
         { .reg = 0x00009888, .val = 0x140B7C00 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x120C0320 },
         { .reg = 0x00009888, .val = 0x040D3E00 },
         { .reg = 0x00009888, .val = 0x000D0000 },
         { .reg = 0x00009888, .val = 0x280D0000 },
         { .reg = 0x00009888, .val = 0x2A0E03E0 },
         { .reg = 0x00009888, .val = 0x10087C00 },
         { .reg = 0x00009888, .val = 0x1E120002 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x1E130002 },
         { .reg = 0x00009888, .val = 0x0E0B0031 },
         { .reg = 0x00009888, .val = 0x180B0092 },
         { .reg = 0x00009888, .val = 0x1A0B00B1 },
         { .reg = 0x00009888, .val = 0x020B0093 },
         { .reg = 0x00009888, .val = 0x040B0033 },
         { .reg = 0x00009888, .val = 0x200B0000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x120B8000 },
         { .reg = 0x00009888, .val = 0x160B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009888, .val = 0x0A0C0022 },
         { .reg = 0x00009888, .val = 0x1E0C0325 },
         { .reg = 0x00009888, .val = 0x180C8000 },
         { .reg = 0x00009888, .val = 0x1A0C8000 },
         { .reg = 0x00009888, .val = 0x100D0017 },
         { .reg = 0x00009888, .val = 0x160D0013 },
         { .reg = 0x00009888, .val = 0x1C0D0081 },
         { .reg = 0x00009888, .val = 0x080D0082 },
         { .reg = 0x00009888, .val = 0x0A0D4102 },
         { .reg = 0x00009888, .val = 0x140D0000 },
         { .reg = 0x00009888, .val = 0x0C0D0000 },
         { .reg = 0x00009888, .val = 0x0E0D0000 },
         { .reg = 0x00009888, .val = 0x120D8000 },
         { .reg = 0x00009888, .val = 0x0C0E0215 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x1E0E8000 },
         { .reg = 0x00009888, .val = 0x0E0F8000 },
         { .reg = 0x00009888, .val = 0x060F4000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009888, .val = 0x08081000 },
         { .reg = 0x00009888, .val = 0x16080000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111555 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x4D101408 },
         { .reg = 0x00009888, .val = 0x4F100801 },
         { .reg = 0x00009888, .val = 0x51100A08 },
         { .reg = 0x00009888, .val = 0x53101118 },
         { .reg = 0x00009888, .val = 0x55100801 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101111 },
         { .reg = 0x00009888, .val = 0x4B100801 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "40ef13ef-25d9-4ac6-8582-ce97c47fc900";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 31);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14112400 },
         { .reg = 0x00009888, .val = 0x14312400 },
         { .reg = 0x00009888, .val = 0x240A0019 },
         { .reg = 0x00009888, .val = 0x00110074 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x00128000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x0E310074 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x06310000 },
         { .reg = 0x00009888, .val = 0x0E328000 },
         { .reg = 0x00009888, .val = 0x1C330200 },
         { .reg = 0x00009888, .val = 0x1C058000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B8000 },
         { .reg = 0x00009888, .val = 0x100B8000 },
         { .reg = 0x00009888, .val = 0x100C8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x000E8000 },
         { .reg = 0x00009888, .val = 0x000F4000 },
         { .reg = 0x00009888, .val = 0x100A00F7 },
         { .reg = 0x00009888, .val = 0x140A0000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A0000 },
         { .reg = 0x00009888, .val = 0x0E084000 },
         { .reg = 0x00009888, .val = 0x14092000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100110 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x70800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00070000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000002 },
         { .reg = 0x0000D94C, .val = 0x0000FFFD },
         { .reg = 0x0000DC08, .val = 0x00000002 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFD },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFFB },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFFB },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__hdc_and_sf__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "bf60f7bd-9029-4edf-8311-f6e86599395e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 37);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1E055000 },
         { .reg = 0x00009888, .val = 0x1A0500C0 },
         { .reg = 0x00009888, .val = 0x2A0A7300 },
         { .reg = 0x00009888, .val = 0x2C0A0000 },
         { .reg = 0x00009888, .val = 0x120800A0 },
         { .reg = 0x00009888, .val = 0x04052700 },
         { .reg = 0x00009888, .val = 0x060500C0 },
         { .reg = 0x00009888, .val = 0x22050000 },
         { .reg = 0x00009888, .val = 0x1C050000 },
         { .reg = 0x00009888, .val = 0x000B8000 },
         { .reg = 0x00009888, .val = 0x0E0B8000 },
         { .reg = 0x00009888, .val = 0x100B8000 },
         { .reg = 0x00009888, .val = 0x120B8000 },
         { .reg = 0x00009888, .val = 0x140B8000 },
         { .reg = 0x00009888, .val = 0x080B8000 },
         { .reg = 0x00009888, .val = 0x0A0B8000 },
         { .reg = 0x00009888, .val = 0x0C0B8000 },
         { .reg = 0x00009888, .val = 0x000A0144 },
         { .reg = 0x00009888, .val = 0x0E0A4145 },
         { .reg = 0x00009888, .val = 0x100A0156 },
         { .reg = 0x00009888, .val = 0x080A814F },
         { .reg = 0x00009888, .val = 0x140A0000 },
         { .reg = 0x00009888, .val = 0x040A0000 },
         { .reg = 0x00009888, .val = 0x0A0A4000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009888, .val = 0x08081980 },
         { .reg = 0x00009888, .val = 0x0A084032 },
         { .reg = 0x00009888, .val = 0x10080000 },
         { .reg = 0x00009888, .val = 0x16080000 },
         { .reg = 0x00009888, .val = 0x0C084000 },
         { .reg = 0x00009888, .val = 0x14091800 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x31152800 },
         { .reg = 0x00009888, .val = 0x331500A0 },
         { .reg = 0x00009888, .val = 0x31352800 },
         { .reg = 0x00009888, .val = 0x333500A0 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B10556B },
         { .reg = 0x00009888, .val = 0x5D103005 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115540 },
         { .reg = 0x00009888, .val = 0x61110005 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x05150096 },
         { .reg = 0x00009888, .val = 0x07150016 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x0316C000 },
         { .reg = 0x00009888, .val = 0x1F350096 },
         { .reg = 0x00009888, .val = 0x03350016 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x0F368000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x47101600 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100606 },
         { .reg = 0x00009888, .val = 0x51100404 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101000 },
         { .reg = 0x00009888, .val = 0x57100002 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B100602 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000038 },
         { .reg = 0x0000D944, .val = 0x0000FFF8 },
         { .reg = 0x0000DC00, .val = 0x00000038 },
         { .reg = 0x0000DC04, .val = 0x0000FFF8 },
         { .reg = 0x0000D948, .val = 0x000000C0 },
         { .reg = 0x0000D94C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC08, .val = 0x000000C0 },
         { .reg = 0x0000DC0C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__pixel_data00_ready__read;
      counter->name = "Slice0 Pipe0 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData00Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__ps_output00_available__read;
      counter->name = "Slice0 Pipe0 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe0 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput00Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__pixel_values00_ready__read;
      counter->name = "Slice0 Pipe0 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe0 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues00Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__gt_request_queue00_full__read;
      counter->name = "SQ00 is full";
      counter->desc = "The percentage of time when IDI0 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue00Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__gt_request_queue01_full__read;
      counter->name = "SQ01 is full";
      counter->desc = "The percentage of time when IDI0 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue01Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__gt_request_queue10_full__read;
      counter->name = "SQ10 is full";
      counter->desc = "The percentage of time when IDI1 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue10Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__rasterizer_and_pixel_backend__gt_request_queue11_full__read;
      counter->name = "SQ11 is full";
      counter->desc = "The percentage of time when IDI1 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue11Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_1";
   query->symbol_name = "L3_1";
   query->guid = "a5f6ac3d-853f-42a4-a038-c4a09e8075be";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04002024 },
         { .reg = 0x00009888, .val = 0x0600282F },
         { .reg = 0x00009888, .val = 0x1C0B8000 },
         { .reg = 0x00009888, .val = 0x1E0B8000 },
         { .reg = 0x00009888, .val = 0x020B8000 },
         { .reg = 0x00009888, .val = 0x040B8000 },
         { .reg = 0x00009888, .val = 0x060B8000 },
         { .reg = 0x00009888, .val = 0x080B8000 },
         { .reg = 0x00009888, .val = 0x0A0B8000 },
         { .reg = 0x00009888, .val = 0x0C0B8000 },
         { .reg = 0x00009888, .val = 0x120AC000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060AC000 },
         { .reg = 0x00009888, .val = 0x080AC000 },
         { .reg = 0x00009888, .val = 0x0A0A4000 },
         { .reg = 0x00009888, .val = 0x1608A000 },
         { .reg = 0x00009888, .val = 0x02088000 },
         { .reg = 0x00009888, .val = 0x04088000 },
         { .reg = 0x00009888, .val = 0x06088000 },
         { .reg = 0x00009888, .val = 0x08088000 },
         { .reg = 0x00009888, .val = 0x0A088000 },
         { .reg = 0x00009888, .val = 0x0C088000 },
         { .reg = 0x00009888, .val = 0x1C000027 },
         { .reg = 0x00009888, .val = 0x1E000026 },
         { .reg = 0x00009888, .val = 0x02000025 },
         { .reg = 0x00009888, .val = 0x0800002E },
         { .reg = 0x00009888, .val = 0x0A00002D },
         { .reg = 0x00009888, .val = 0x0C00002C },
         { .reg = 0x00009888, .val = 0x2E000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100555 },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x5F110555 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_1__l30_bank0_input_available__read;
         counter->name = "Slice0 L3 Bank0 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank0 has input available Unit: percent.";
         counter->symbol_name = "L30Bank0InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_1__l30_bank1_input_available__read;
         counter->name = "Slice0 L3 Bank1 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank1 has input available Unit: percent.";
         counter->symbol_name = "L30Bank1InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_2";
   query->symbol_name = "L3_2";
   query->guid = "bb080384-a01f-442f-8be2-3eae2c7394cd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04003000 },
         { .reg = 0x00009888, .val = 0x06003800 },
         { .reg = 0x00009888, .val = 0x000B8000 },
         { .reg = 0x00009888, .val = 0x0E0B8000 },
         { .reg = 0x00009888, .val = 0x100B8000 },
         { .reg = 0x00009888, .val = 0x120B8000 },
         { .reg = 0x00009888, .val = 0x140B8000 },
         { .reg = 0x00009888, .val = 0x160B8000 },
         { .reg = 0x00009888, .val = 0x180B8000 },
         { .reg = 0x00009888, .val = 0x1A0B8000 },
         { .reg = 0x00009888, .val = 0x040A4000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0AC000 },
         { .reg = 0x00009888, .val = 0x0E0AC000 },
         { .reg = 0x00009888, .val = 0x100AC000 },
         { .reg = 0x00009888, .val = 0x00088000 },
         { .reg = 0x00009888, .val = 0x0E088000 },
         { .reg = 0x00009888, .val = 0x16080AAA },
         { .reg = 0x00009888, .val = 0x00000024 },
         { .reg = 0x00009888, .val = 0x0E000025 },
         { .reg = 0x00009888, .val = 0x10000026 },
         { .reg = 0x00009888, .val = 0x12000027 },
         { .reg = 0x00009888, .val = 0x1400002C },
         { .reg = 0x00009888, .val = 0x1600002D },
         { .reg = 0x00009888, .val = 0x1800002E },
         { .reg = 0x00009888, .val = 0x1A00002F },
         { .reg = 0x00009888, .val = 0x2E000000 },
         { .reg = 0x00009888, .val = 0x2C000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x5D100155 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110155 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_2__l30_bank2_input_available__read;
         counter->name = "Slice0 L3 Bank2 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank2 has input available Unit: percent.";
         counter->symbol_name = "L30Bank2InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_2__l30_bank3_input_available__read;
         counter->name = "Slice0 L3 Bank3 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank3 has input available Unit: percent.";
         counter->symbol_name = "L30Bank3InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_3";
   query->symbol_name = "L3_3";
   query->guid = "9f39ce80-0463-484e-b797-3135b7e0ab0a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04000200 },
         { .reg = 0x00009888, .val = 0x06000000 },
         { .reg = 0x00009888, .val = 0x0A0B8000 },
         { .reg = 0x00009888, .val = 0x0C0B8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A4000 },
         { .reg = 0x00009888, .val = 0x0A088000 },
         { .reg = 0x00009888, .val = 0x0C088000 },
         { .reg = 0x00009888, .val = 0x0A000020 },
         { .reg = 0x00009888, .val = 0x0C000028 },
         { .reg = 0x00009888, .val = 0x2E000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100500 },
         { .reg = 0x00009888, .val = 0x5F110500 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_3__l30_bank0_output_ready__read;
         counter->name = "Slice0 L3 Bank0 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank0 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank0OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_4";
   query->symbol_name = "L3_4";
   query->guid = "11e091e1-bbd3-4bd6-9ce8-8982d2d3a3b2";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04000A00 },
         { .reg = 0x00009888, .val = 0x06000820 },
         { .reg = 0x00009888, .val = 0x060B8000 },
         { .reg = 0x00009888, .val = 0x080B8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A4000 },
         { .reg = 0x00009888, .val = 0x06088000 },
         { .reg = 0x00009888, .val = 0x08088000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x2E000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100050 },
         { .reg = 0x00009888, .val = 0x5F110050 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_4__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_4__l30_bank1_output_ready__read;
         counter->name = "Slice0 L3 Bank1 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank1 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank1OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_l3_5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_5";
   query->symbol_name = "L3_5";
   query->guid = "0f2e1818-c190-494a-a828-2ed4422d71f3";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04001200 },
         { .reg = 0x00009888, .val = 0x06001020 },
         { .reg = 0x00009888, .val = 0x060B8000 },
         { .reg = 0x00009888, .val = 0x080B8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A4000 },
         { .reg = 0x00009888, .val = 0x06088000 },
         { .reg = 0x00009888, .val = 0x08088000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x2E000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100050 },
         { .reg = 0x00009888, .val = 0x5F110050 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__l3_5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_5__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_5__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_5__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_5__l30_bank2_output_ready__read;
         counter->name = "Slice0 L3 Bank2 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank2 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank2OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_l3_6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_6";
   query->symbol_name = "L3_6";
   query->guid = "ebd91ba3-4d01-4bdf-8752-384cd0a4e139";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 14);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04001A00 },
         { .reg = 0x00009888, .val = 0x06001820 },
         { .reg = 0x00009888, .val = 0x060B8000 },
         { .reg = 0x00009888, .val = 0x080B8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A4000 },
         { .reg = 0x00009888, .val = 0x06088000 },
         { .reg = 0x00009888, .val = 0x08088000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x2E000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100050 },
         { .reg = 0x00009888, .val = 0x5F110050 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__l3_6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__l3_6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_6__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_6__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__l3_6__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__l3_6__l30_bank3_output_ready__read;
         counter->name = "Slice0 L3 Bank3 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank3 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank3OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_1";
   query->symbol_name = "Sampler_1";
   query->guid = "5102fa66-b5f3-45f2-a16d-0a13a4cbb6c9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0C123E00 },
         { .reg = 0x00009888, .val = 0x1C121600 },
         { .reg = 0x00009888, .val = 0x04143E00 },
         { .reg = 0x00009888, .val = 0x18141613 },
         { .reg = 0x00009888, .val = 0x0C323E00 },
         { .reg = 0x00009888, .val = 0x1C321600 },
         { .reg = 0x00009888, .val = 0x04343E00 },
         { .reg = 0x00009888, .val = 0x18341600 },
         { .reg = 0x00009888, .val = 0x0E120086 },
         { .reg = 0x00009888, .val = 0x1A120033 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x18124000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C13C200 },
         { .reg = 0x00009888, .val = 0x001400A6 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x0C140000 },
         { .reg = 0x00009888, .val = 0x18158000 },
         { .reg = 0x00009888, .val = 0x1C150800 },
         { .reg = 0x00009888, .val = 0x12320086 },
         { .reg = 0x00009888, .val = 0x16320033 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x10324000 },
         { .reg = 0x00009888, .val = 0x14324000 },
         { .reg = 0x00009888, .val = 0x1C333C00 },
         { .reg = 0x00009888, .val = 0x103400A6 },
         { .reg = 0x00009888, .val = 0x14340013 },
         { .reg = 0x00009888, .val = 0x08340000 },
         { .reg = 0x00009888, .val = 0x0A340000 },
         { .reg = 0x00009888, .val = 0x1C350088 },
         { .reg = 0x00009888, .val = 0x220500F0 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B8000 },
         { .reg = 0x00009888, .val = 0x120B8000 },
         { .reg = 0x00009888, .val = 0x140B8000 },
         { .reg = 0x00009888, .val = 0x160B8000 },
         { .reg = 0x00009888, .val = 0x180B4000 },
         { .reg = 0x00009888, .val = 0x1A0B4000 },
         { .reg = 0x00009888, .val = 0x100C8000 },
         { .reg = 0x00009888, .val = 0x1E0C00C2 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x0A0D8000 },
         { .reg = 0x00009888, .val = 0x100DC000 },
         { .reg = 0x00009888, .val = 0x000E8000 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x180E8000 },
         { .reg = 0x00009888, .val = 0x1A0E8000 },
         { .reg = 0x00009888, .val = 0x000F4000 },
         { .reg = 0x00009888, .val = 0x060F8000 },
         { .reg = 0x00009888, .val = 0x0C0FC000 },
         { .reg = 0x00009888, .val = 0x0C0AC000 },
         { .reg = 0x00009888, .val = 0x0E0AC000 },
         { .reg = 0x00009888, .val = 0x16080055 },
         { .reg = 0x00009888, .val = 0x1409C000 },
         { .reg = 0x00009888, .val = 0x16090003 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x5D100155 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115000 },
         { .reg = 0x00009888, .val = 0x61110155 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x19128000 },
         { .reg = 0x00009888, .val = 0x1B128000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100004 },
         { .reg = 0x00009888, .val = 0x51100004 },
         { .reg = 0x00009888, .val = 0x53100004 },
         { .reg = 0x00009888, .val = 0x55100004 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x000F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__sampler_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__sampler_1__sampler00_input_available__read;
         counter->name = "Slice0 DualSubslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__sampler_1__sampler01_input_available__read;
         counter->name = "Slice0 DualSubslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__sampler_1__sampler00_output_ready__read;
         counter->name = "Slice0 DualSubslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__sampler_1__sampler01_output_ready__read;
         counter->name = "Slice0 DualSubslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "8706c725-7753-427d-ae2d-ba25c1168f2a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 27);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x24110340 },
         { .reg = 0x00009888, .val = 0x2611001C },
         { .reg = 0x00009888, .val = 0x24310340 },
         { .reg = 0x00009888, .val = 0x2631001C },
         { .reg = 0x00009888, .val = 0x001100F3 },
         { .reg = 0x00009888, .val = 0x0E1100F2 },
         { .reg = 0x00009888, .val = 0x16110103 },
         { .reg = 0x00009888, .val = 0x06110107 },
         { .reg = 0x00009888, .val = 0x08110106 },
         { .reg = 0x00009888, .val = 0x0A110105 },
         { .reg = 0x00009888, .val = 0x0C110104 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x02110000 },
         { .reg = 0x00009888, .val = 0x04110000 },
         { .reg = 0x00009888, .val = 0x00128000 },
         { .reg = 0x00009888, .val = 0x0E128000 },
         { .reg = 0x00009888, .val = 0x16128000 },
         { .reg = 0x00009888, .val = 0x06128000 },
         { .reg = 0x00009888, .val = 0x08128000 },
         { .reg = 0x00009888, .val = 0x0A128000 },
         { .reg = 0x00009888, .val = 0x0C128000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C132300 },
         { .reg = 0x00009888, .val = 0x16138000 },
         { .reg = 0x00009888, .val = 0x18138000 },
         { .reg = 0x00009888, .val = 0x1A138000 },
         { .reg = 0x00009888, .val = 0x103100F3 },
         { .reg = 0x00009888, .val = 0x123100F2 },
         { .reg = 0x00009888, .val = 0x14310103 },
         { .reg = 0x00009888, .val = 0x1C310107 },
         { .reg = 0x00009888, .val = 0x1E310106 },
         { .reg = 0x00009888, .val = 0x02310105 },
         { .reg = 0x00009888, .val = 0x04310104 },
         { .reg = 0x00009888, .val = 0x08310000 },
         { .reg = 0x00009888, .val = 0x0A310000 },
         { .reg = 0x00009888, .val = 0x0E310000 },
         { .reg = 0x00009888, .val = 0x00310000 },
         { .reg = 0x00009888, .val = 0x10328000 },
         { .reg = 0x00009888, .val = 0x12328000 },
         { .reg = 0x00009888, .val = 0x14328000 },
         { .reg = 0x00009888, .val = 0x1C328000 },
         { .reg = 0x00009888, .val = 0x1E328000 },
         { .reg = 0x00009888, .val = 0x02328000 },
         { .reg = 0x00009888, .val = 0x04328000 },
         { .reg = 0x00009888, .val = 0x1C331C00 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x12338000 },
         { .reg = 0x00009888, .val = 0x14338000 },
         { .reg = 0x00009888, .val = 0x22050C70 },
         { .reg = 0x00009888, .val = 0x12058000 },
         { .reg = 0x00009888, .val = 0x14058000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B8000 },
         { .reg = 0x00009888, .val = 0x120B8000 },
         { .reg = 0x00009888, .val = 0x140B8000 },
         { .reg = 0x00009888, .val = 0x160B4000 },
         { .reg = 0x00009888, .val = 0x1C0B8000 },
         { .reg = 0x00009888, .val = 0x1E0B8000 },
         { .reg = 0x00009888, .val = 0x020B8000 },
         { .reg = 0x00009888, .val = 0x040B8000 },
         { .reg = 0x00009888, .val = 0x060B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009888, .val = 0x100C8000 },
         { .reg = 0x00009888, .val = 0x1E0C0023 },
         { .reg = 0x00009888, .val = 0x160C8000 },
         { .reg = 0x00009888, .val = 0x180C8000 },
         { .reg = 0x00009888, .val = 0x1A0C8000 },
         { .reg = 0x00009888, .val = 0x040D4000 },
         { .reg = 0x00009888, .val = 0x0A0DC000 },
         { .reg = 0x00009888, .val = 0x0E0D8000 },
         { .reg = 0x00009888, .val = 0x060D8000 },
         { .reg = 0x00009888, .val = 0x080DC000 },
         { .reg = 0x00009888, .val = 0x000E8000 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x160E8000 },
         { .reg = 0x00009888, .val = 0x060E8000 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x0A0E8000 },
         { .reg = 0x00009888, .val = 0x0C0E8000 },
         { .reg = 0x00009888, .val = 0x000F4000 },
         { .reg = 0x00009888, .val = 0x060FC000 },
         { .reg = 0x00009888, .val = 0x0A0F8000 },
         { .reg = 0x00009888, .val = 0x020F8000 },
         { .reg = 0x00009888, .val = 0x040FC000 },
         { .reg = 0x00009888, .val = 0x0C0AC000 },
         { .reg = 0x00009888, .val = 0x0E0A4000 },
         { .reg = 0x00009888, .val = 0x120AC000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060A4000 },
         { .reg = 0x00009888, .val = 0x16085015 },
         { .reg = 0x00009888, .val = 0x02084000 },
         { .reg = 0x00009888, .val = 0x04084000 },
         { .reg = 0x00009888, .val = 0x1409C180 },
         { .reg = 0x00009888, .val = 0x16090031 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101415 },
         { .reg = 0x00009888, .val = 0x4B112000 },
         { .reg = 0x00009888, .val = 0x5F115555 },
         { .reg = 0x00009888, .val = 0x61111415 },
         { .reg = 0x00009888, .val = 0x01128000 },
         { .reg = 0x00009888, .val = 0x0F128000 },
         { .reg = 0x00009888, .val = 0x11128000 },
         { .reg = 0x00009888, .val = 0x13128000 },
         { .reg = 0x00009888, .val = 0x15128000 },
         { .reg = 0x00009888, .val = 0x17128000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x07128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000000 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000000 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000000 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000000 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFEF },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFEF },
         { .reg = 0x0000D958, .val = 0x00000002 },
         { .reg = 0x0000D95C, .val = 0x0000FFDF },
         { .reg = 0x0000DC18, .val = 0x00000002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFDF },
         { .reg = 0x0000D960, .val = 0x00078000 },
         { .reg = 0x0000D964, .val = 0x00000FFF },
         { .reg = 0x0000DC20, .val = 0x00078000 },
         { .reg = 0x0000DC24, .val = 0x00000FFF },
         { .reg = 0x0000D968, .val = 0x00007800 },
         { .reg = 0x0000D96C, .val = 0x0000F0FF },
         { .reg = 0x0000DC28, .val = 0x00007800 },
         { .reg = 0x0000DC2C, .val = 0x0000F0FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__tdl_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header00_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header00_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header01_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header01_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header00_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader00Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = rkl__tdl_1__thread_header01_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader01Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "GpuBusyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "3eab195e-e4d8-482e-9981-811935722b21";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 21);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x040B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x140C8000 },
         { .reg = 0x00009888, .val = 0x180C8000 },
         { .reg = 0x00009888, .val = 0x1A0C8000 },
         { .reg = 0x00009888, .val = 0x060D4000 },
         { .reg = 0x00009888, .val = 0x080DC000 },
         { .reg = 0x00009888, .val = 0x040E0043 },
         { .reg = 0x00009888, .val = 0x0A0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x080F00A3 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x020F4000 },
         { .reg = 0x00009888, .val = 0x040F8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x15102400 },
         { .reg = 0x00009888, .val = 0x230B0120 },
         { .reg = 0x00009888, .val = 0x15182400 },
         { .reg = 0x00009888, .val = 0x17100023 },
         { .reg = 0x00009888, .val = 0x11100000 },
         { .reg = 0x00009888, .val = 0x5D101000 },
         { .reg = 0x00009888, .val = 0x5B100544 },
         { .reg = 0x00009888, .val = 0x61112000 },
         { .reg = 0x00009888, .val = 0x5F110944 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x09128000 },
         { .reg = 0x00009888, .val = 0x0B128000 },
         { .reg = 0x00009888, .val = 0x1B0D0040 },
         { .reg = 0x00009888, .val = 0x0D0DC000 },
         { .reg = 0x00009888, .val = 0x1F0B00D3 },
         { .reg = 0x00009888, .val = 0x190B0000 },
         { .reg = 0x00009888, .val = 0x170B0000 },
         { .reg = 0x00009888, .val = 0x07174000 },
         { .reg = 0x00009888, .val = 0x071800A3 },
         { .reg = 0x00009888, .val = 0x11180000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x5710000A },
         { .reg = 0x00009888, .val = 0x49101000 },
         { .reg = 0x00009888, .val = 0x4B100206 },
         { .reg = 0x00009888, .val = 0x4D100800 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00024002 },
         { .reg = 0x0000D944, .val = 0x0000B7FF },
         { .reg = 0x0000DC00, .val = 0x00024002 },
         { .reg = 0x0000DC04, .val = 0x0000B7FF },
         { .reg = 0x0000D948, .val = 0x0007D000 },
         { .reg = 0x0000D94C, .val = 0x000005FF },
         { .reg = 0x0000DC08, .val = 0x0007D000 },
         { .reg = 0x0000DC0C, .val = 0x000005FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__gpu_busyness__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__posh_engine_busy__read;
      counter->name = "Posh Ring Busy";
      counter->desc = "The percentage of time when posh command streamer was busy. Unit: percent.";
      counter->symbol_name = "PoshEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__gpu_busyness__any_engine_busy__read;
      counter->name = "Any Engine Busy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity1";
   query->symbol_name = "EuActivity1";
   query->guid = "100ae8ce-31c6-457f-97a2-dfd44812bce6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00810710 },
         { .reg = 0x0000E558, .val = 0x00A10910 },
         { .reg = 0x0000E658, .val = 0x00850750 },
         { .reg = 0x0000E758, .val = 0x00A50950 },
         { .reg = 0x0000E45C, .val = 0x00802702 },
         { .reg = 0x0000E55C, .val = 0x00A02902 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity1__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity1__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity1__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity1__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity1__vs_fpu_active__read;
      counter->name = "VS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity1__ps_fpu_active__read;
      counter->name = "PS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpuActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity1__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity2";
   query->symbol_name = "EuActivity2";
   query->guid = "7e926c0a-471b-40b3-a641-75f3d73339ec";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00862762 },
         { .reg = 0x0000E558, .val = 0x00A62962 },
         { .reg = 0x0000E658, .val = 0x00860760 },
         { .reg = 0x0000E758, .val = 0x00A60960 },
         { .reg = 0x0000E45C, .val = 0x00861761 },
         { .reg = 0x0000E55C, .val = 0x00A61961 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity2__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity2__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity2__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity2__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity2__cs_em_active__read;
      counter->name = "CS EM Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsEmActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity2__cs_fpu_active__read;
      counter->name = "CS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity2__cs_send_active__read;
      counter->name = "CS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsSendActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity3";
   query->symbol_name = "EuActivity3";
   query->guid = "aca9005f-fafe-4d02-a9de-b2584ff76f04";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00811711 },
         { .reg = 0x0000E558, .val = 0x00A11911 },
         { .reg = 0x0000E658, .val = 0x00851751 },
         { .reg = 0x0000E758, .val = 0x00A51951 },
         { .reg = 0x0000E45C, .val = 0x00852752 },
         { .reg = 0x0000E55C, .val = 0x00A52952 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity3__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity3__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity3__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity3__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity3__vs_em_active__read;
      counter->name = "VS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsEmActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity3__ps_em_active__read;
      counter->name = "PS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsEmActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity4";
   query->symbol_name = "EuActivity4";
   query->guid = "ad96806d-7c0d-49f0-b844-c5010ade30af";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00820720 },
         { .reg = 0x0000E558, .val = 0x00A20920 },
         { .reg = 0x0000E658, .val = 0x00830730 },
         { .reg = 0x0000E758, .val = 0x00A30930 },
         { .reg = 0x0000E45C, .val = 0x00812712 },
         { .reg = 0x0000E55C, .val = 0x00A12912 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity4__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity4__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity4__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity4__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity4__hs_fpu_active__read;
      counter->name = "HS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsFpuActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity4__ds_fpu_active__read;
      counter->name = "DS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsFpuActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity5";
   query->symbol_name = "EuActivity5";
   query->guid = "1669e79f-976e-47ba-a553-0a716ebd3c71";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00821721 },
         { .reg = 0x0000E558, .val = 0x00A21921 },
         { .reg = 0x0000E658, .val = 0x00831731 },
         { .reg = 0x0000E758, .val = 0x00A31931 },
         { .reg = 0x0000E45C, .val = 0x00822722 },
         { .reg = 0x0000E55C, .val = 0x00A22922 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity5__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity5__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity5__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity5__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity5__hs_em_active__read;
      counter->name = "HS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a hull shader instructions. Unit: percent.";
      counter->symbol_name = "HsEmActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity5__ds_em_active__read;
      counter->name = "DS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsEmActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity5__hs_send_active__read;
      counter->name = "HS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsSendActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity6";
   query->symbol_name = "EuActivity6";
   query->guid = "d100e474-1e6c-4100-812b-71ec9cfe3523";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00840740 },
         { .reg = 0x0000E558, .val = 0x00A40940 },
         { .reg = 0x0000E658, .val = 0x00841741 },
         { .reg = 0x0000E758, .val = 0x00A41941 },
         { .reg = 0x0000E45C, .val = 0x00842742 },
         { .reg = 0x0000E55C, .val = 0x00A42942 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity6__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity6__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity6__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity6__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity6__gs_fpu_active__read;
      counter->name = "GS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsFpuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity6__gs_em_active__read;
      counter->name = "GS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsEmActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity6__gs_send_active__read;
      counter->name = "GS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a geometry shader instruction. Unit: percent.";
      counter->symbol_name = "GsSendActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity7_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity7";
   query->symbol_name = "EuActivity7";
   query->guid = "b22d38f6-a056-496a-8d7d-dede47051806";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00803703 },
         { .reg = 0x0000E558, .val = 0x00A03903 },
         { .reg = 0x0000E658, .val = 0x00800700 },
         { .reg = 0x0000E758, .val = 0x00A00900 },
         { .reg = 0x0000E45C, .val = 0x00801701 },
         { .reg = 0x0000E55C, .val = 0x00A01901 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity7__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__fpu_active__read;
      counter->name = "EU FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "FpuActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__em_active__read;
      counter->name = "EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__eu_fpu_em_active__read;
      counter->name = "EU FPU And EM Pipes Active";
      counter->desc = "The percentage of time in which EU FPU and EM pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuEmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity7__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity7__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_eu_activity8_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity8";
   query->symbol_name = "EuActivity8";
   query->guid = "c68abf37-fe6e-45f6-9dcb-549b3fbef699";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 16);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E2400 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0300 },
         { .reg = 0x00009888, .val = 0x120DC000 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x200E0000 },
         { .reg = 0x00009888, .val = 0x0E0FC000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x61111400 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B101212 },
         { .reg = 0x00009888, .val = 0x4D100212 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00832732 },
         { .reg = 0x0000E558, .val = 0x00A32932 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__eu_activity8__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity8__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity8__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity8__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity8__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__eu_activity8__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = rkl__eu_activity8__ds_send_active__read;
      counter->name = "DS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a domain shader instruction. Unit: percent.";
      counter->symbol_name = "DsSendActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
rkl_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TestOa";
   query->symbol_name = "TestOa";
   query->guid = "74dbc739-d871-41b5-be7b-fb547ac5bec9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 13);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x040F0000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x1E0C0200 },
         { .reg = 0x00009888, .val = 0x120D8000 },
         { .reg = 0x00009888, .val = 0x1E0F0017 },
         { .reg = 0x00009888, .val = 0x100F0000 },
         { .reg = 0x00009888, .val = 0x0E0F0000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x49110000 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x1D1103A3 },
         { .reg = 0x00009888, .val = 0x01110000 },
         { .reg = 0x00009888, .val = 0x61111000 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100110 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000004 },
         { .reg = 0x0000D944, .val = 0x0000FFFF },
         { .reg = 0x0000DC00, .val = 0x00000004 },
         { .reg = 0x0000DC04, .val = 0x0000FFFF },
         { .reg = 0x0000D948, .val = 0x00000003 },
         { .reg = 0x0000D94C, .val = 0x0000FFFF },
         { .reg = 0x0000DC08, .val = 0x00000003 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFF },
         { .reg = 0x0000D950, .val = 0x00000007 },
         { .reg = 0x0000D954, .val = 0x0000FFFF },
         { .reg = 0x0000DC10, .val = 0x00000007 },
         { .reg = 0x0000DC14, .val = 0x0000FFFF },
         { .reg = 0x0000D958, .val = 0x00100002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00100002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00100002 },
         { .reg = 0x0000D964, .val = 0x0000FFCF },
         { .reg = 0x0000DC20, .val = 0x00100002 },
         { .reg = 0x0000DC24, .val = 0x0000FFCF },
         { .reg = 0x0000D968, .val = 0x00100082 },
         { .reg = 0x0000D96C, .val = 0x0000FFEF },
         { .reg = 0x0000DC28, .val = 0x00100082 },
         { .reg = 0x0000DC2C, .val = 0x0000FFEF },
         { .reg = 0x0000D970, .val = 0x001000C2 },
         { .reg = 0x0000D974, .val = 0x0000FFE7 },
         { .reg = 0x0000DC30, .val = 0x001000C2 },
         { .reg = 0x0000DC34, .val = 0x0000FFE7 },
         { .reg = 0x0000D978, .val = 0x00100001 },
         { .reg = 0x0000D97C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC38, .val = 0x00100001 },
         { .reg = 0x0000DC3C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = rkl__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.16666 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.6666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1 in IOStream or in OAG query mode Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = rkl__test_oa__counter9__read;
      counter->name = "TestCounter9 - OAR enable";
      counter->desc = "HW test counter 9. Should be equal to 1 in query. Unit: events.";
      counter->symbol_name = "Counter9";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_rkl(struct intel_perf_config *perf)
{
   rkl_register_render_basic_counter_query(perf);
   rkl_register_compute_basic_counter_query(perf);
   rkl_register_render_pipe_profile_counter_query(perf);
   rkl_register_hdc_and_sf_counter_query(perf);
   rkl_register_rasterizer_and_pixel_backend_counter_query(perf);
   rkl_register_l3_1_counter_query(perf);
   rkl_register_l3_2_counter_query(perf);
   rkl_register_l3_3_counter_query(perf);
   rkl_register_l3_4_counter_query(perf);
   rkl_register_l3_5_counter_query(perf);
   rkl_register_l3_6_counter_query(perf);
   rkl_register_sampler_1_counter_query(perf);
   rkl_register_tdl_1_counter_query(perf);
   rkl_register_gpu_busyness_counter_query(perf);
   rkl_register_eu_activity1_counter_query(perf);
   rkl_register_eu_activity2_counter_query(perf);
   rkl_register_eu_activity3_counter_query(perf);
   rkl_register_eu_activity4_counter_query(perf);
   rkl_register_eu_activity5_counter_query(perf);
   rkl_register_eu_activity6_counter_query(perf);
   rkl_register_eu_activity7_counter_query(perf);
   rkl_register_eu_activity8_counter_query(perf);
   rkl_register_test_oa_counter_query(perf);
}


static void
dg1_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "1caf6b6d-a1ef-40d3-9033-311e482b826e";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 34);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14150001 },
         { .reg = 0x00009888, .val = 0x16150020 },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x10124000 },
         { .reg = 0x00009888, .val = 0x12124000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C130E00 },
         { .reg = 0x00009888, .val = 0x00150050 },
         { .reg = 0x00009888, .val = 0x06157000 },
         { .reg = 0x00009888, .val = 0x08157151 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x18150000 },
         { .reg = 0x00009888, .val = 0x1C150000 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x1C054000 },
         { .reg = 0x00009888, .val = 0x24050015 },
         { .reg = 0x00009888, .val = 0x10088000 },
         { .reg = 0x00009888, .val = 0x1C08000E },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D47 },
         { .reg = 0x00009888, .val = 0x09151536 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B1050BB },
         { .reg = 0x00009888, .val = 0x5D102C01 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11A000 },
         { .reg = 0x00009888, .val = 0x61110002 },
         { .reg = 0x00009888, .val = 0x1F150137 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x0F168000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x1D350137 },
         { .reg = 0x00009888, .val = 0x03350147 },
         { .reg = 0x00009888, .val = 0x07350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x0F364000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101610 },
         { .reg = 0x00009888, .val = 0x57100006 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B100616 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__render_basic__sampler00_busy__read;
         counter->name = "Sampler00 Busy";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__render_basic__sampler00_bottleneck__read;
         counter->name = "Sampler Slice0 Dualsubslice0 is bottleneck";
         counter->desc = "The percentage of time when sampler slice0 dualsubslice0 is bottleneck Unit: percent.";
         counter->symbol_name = "Sampler00Bottleneck";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__render_basic__samplers_busy__read;
         counter->name = "Samplers Busy";
         counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplersBusy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__render_basic__sampler_bottleneck__read;
         counter->name = "Samplers Bottleneck";
         counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplerBottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic set";
   query->symbol_name = "ComputeBasic";
   query->guid = "e347a2bd-079d-4a77-b546-c3c114c55bae";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 30);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "3b27f553-aebc-45ec-aa2f-926d81a91fd9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x00123E00 },
         { .reg = 0x00009888, .val = 0x10087C00 },
         { .reg = 0x00009888, .val = 0x060C40B3 },
         { .reg = 0x00009888, .val = 0x140C3C00 },
         { .reg = 0x00009888, .val = 0x1C0C0000 },
         { .reg = 0x00009888, .val = 0x120D0320 },
         { .reg = 0x00009888, .val = 0x040E3E00 },
         { .reg = 0x00009888, .val = 0x000E0000 },
         { .reg = 0x00009888, .val = 0x280E0000 },
         { .reg = 0x00009888, .val = 0x2C0F001F },
         { .reg = 0x00009888, .val = 0x1E120002 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x1E130002 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34005540 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x06004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2405FFDF },
         { .reg = 0x00009888, .val = 0x26050001 },
         { .reg = 0x00009888, .val = 0x2205FFF0 },
         { .reg = 0x00009888, .val = 0x08081000 },
         { .reg = 0x00009888, .val = 0x1C080200 },
         { .reg = 0x00009888, .val = 0x24065155 },
         { .reg = 0x00009888, .val = 0x26060015 },
         { .reg = 0x00009888, .val = 0x22065400 },
         { .reg = 0x00009888, .val = 0x1C0901F7 },
         { .reg = 0x00009888, .val = 0x14098000 },
         { .reg = 0x00009888, .val = 0x1A09F000 },
         { .reg = 0x00009888, .val = 0x0E0C0031 },
         { .reg = 0x00009888, .val = 0x180C0092 },
         { .reg = 0x00009888, .val = 0x1A0C00B1 },
         { .reg = 0x00009888, .val = 0x020C0093 },
         { .reg = 0x00009888, .val = 0x040C0033 },
         { .reg = 0x00009888, .val = 0x100C0000 },
         { .reg = 0x00009888, .val = 0x0C0C0000 },
         { .reg = 0x00009888, .val = 0x000C0000 },
         { .reg = 0x00009888, .val = 0x0A0D0022 },
         { .reg = 0x00009888, .val = 0x100D0000 },
         { .reg = 0x00009888, .val = 0x1E0DA5A8 },
         { .reg = 0x00009888, .val = 0x200D1A41 },
         { .reg = 0x00009888, .val = 0x100E0017 },
         { .reg = 0x00009888, .val = 0x160E0013 },
         { .reg = 0x00009888, .val = 0x1C0E0081 },
         { .reg = 0x00009888, .val = 0x080E0082 },
         { .reg = 0x00009888, .val = 0x0A0E0102 },
         { .reg = 0x00009888, .val = 0x140E0000 },
         { .reg = 0x00009888, .val = 0x0C0E0000 },
         { .reg = 0x00009888, .val = 0x0E0E0000 },
         { .reg = 0x00009888, .val = 0x120E0000 },
         { .reg = 0x00009888, .val = 0x0C0F0225 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x140B4000 },
         { .reg = 0x00009888, .val = 0x160B4000 },
         { .reg = 0x00009888, .val = 0x180B4000 },
         { .reg = 0x00009888, .val = 0x1A0B4000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x020B4000 },
         { .reg = 0x00009888, .val = 0x040B4000 },
         { .reg = 0x00009888, .val = 0x060B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x5F11AAAA },
         { .reg = 0x00009888, .val = 0x61112AAA },
         { .reg = 0x00009888, .val = 0x4D100002 },
         { .reg = 0x00009888, .val = 0x4F100202 },
         { .reg = 0x00009888, .val = 0x5110040A },
         { .reg = 0x00009888, .val = 0x53100202 },
         { .reg = 0x00009888, .val = 0x55100202 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100006 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100202 },
         { .reg = 0x00009888, .val = 0x4B100202 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF (DG1)";
   query->symbol_name = "HDCAndSF";
   query->guid = "1fa73aed-3861-4c19-8105-e55986c8a4e5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 38);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14112600 },
         { .reg = 0x00009888, .val = 0x14312600 },
         { .reg = 0x00009888, .val = 0x14512600 },
         { .reg = 0x00009888, .val = 0x14712600 },
         { .reg = 0x00009888, .val = 0x14912600 },
         { .reg = 0x00009888, .val = 0x14B12600 },
         { .reg = 0x00009888, .val = 0x120800A0 },
         { .reg = 0x00009888, .val = 0x120900A0 },
         { .reg = 0x00009888, .val = 0x120A00A0 },
         { .reg = 0x00009888, .val = 0x240B0019 },
         { .reg = 0x00009888, .val = 0x02110077 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x04310077 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x02310000 },
         { .reg = 0x00009888, .val = 0x04328000 },
         { .reg = 0x00009888, .val = 0x14338000 },
         { .reg = 0x00009888, .val = 0x06510077 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x02510000 },
         { .reg = 0x00009888, .val = 0x06528000 },
         { .reg = 0x00009888, .val = 0x16538000 },
         { .reg = 0x00009888, .val = 0x08710077 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x04710000 },
         { .reg = 0x00009888, .val = 0x08728000 },
         { .reg = 0x00009888, .val = 0x18738000 },
         { .reg = 0x00009888, .val = 0x0A910077 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x04910000 },
         { .reg = 0x00009888, .val = 0x0A928000 },
         { .reg = 0x00009888, .val = 0x1A938000 },
         { .reg = 0x00009888, .val = 0x0CB10077 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x06B10000 },
         { .reg = 0x00009888, .val = 0x0CB28000 },
         { .reg = 0x00009888, .val = 0x1CB30100 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34000540 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x06004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2A011500 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x240505FF },
         { .reg = 0x00009888, .val = 0x2205FF90 },
         { .reg = 0x00009888, .val = 0x0A082300 },
         { .reg = 0x00009888, .val = 0x0C080022 },
         { .reg = 0x00009888, .val = 0x1C080000 },
         { .reg = 0x00009888, .val = 0x14088000 },
         { .reg = 0x00009888, .val = 0x1C064000 },
         { .reg = 0x00009888, .val = 0x24060156 },
         { .reg = 0x00009888, .val = 0x22064000 },
         { .reg = 0x00009888, .val = 0x08092300 },
         { .reg = 0x00009888, .val = 0x0A090022 },
         { .reg = 0x00009888, .val = 0x1C090007 },
         { .reg = 0x00009888, .val = 0x10098000 },
         { .reg = 0x00009888, .val = 0x1A09E000 },
         { .reg = 0x00009888, .val = 0x22078000 },
         { .reg = 0x00009888, .val = 0x060A2300 },
         { .reg = 0x00009888, .val = 0x080A0022 },
         { .reg = 0x00009888, .val = 0x100A0000 },
         { .reg = 0x00009888, .val = 0x180A0180 },
         { .reg = 0x00009888, .val = 0x1E0D5400 },
         { .reg = 0x00009888, .val = 0x200D0001 },
         { .reg = 0x00009888, .val = 0x0A0EC000 },
         { .reg = 0x00009888, .val = 0x0C0E4000 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x000B00F7 },
         { .reg = 0x00009888, .val = 0x200B0000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x060B8000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D100055 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D140007 },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11AAAA },
         { .reg = 0x00009888, .val = 0x611100AA },
         { .reg = 0x00009888, .val = 0x47101310 },
         { .reg = 0x00009888, .val = 0x4D100002 },
         { .reg = 0x00009888, .val = 0x4F101808 },
         { .reg = 0x00009888, .val = 0x51101717 },
         { .reg = 0x00009888, .val = 0x53100717 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x49100311 },
         { .reg = 0x00009888, .val = 0x4B100A08 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x000F0000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000030 },
         { .reg = 0x0000D94C, .val = 0x0000FFF9 },
         { .reg = 0x0000DC08, .val = 0x00000030 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF9 },
         { .reg = 0x0000D950, .val = 0x000000C0 },
         { .reg = 0x0000D954, .val = 0x0000FFE7 },
         { .reg = 0x0000DC10, .val = 0x000000C0 },
         { .reg = 0x0000DC14, .val = 0x0000FFE7 },
         { .reg = 0x0000D958, .val = 0x00000300 },
         { .reg = 0x0000D95C, .val = 0x0000FF9F },
         { .reg = 0x0000DC18, .val = 0x00000300 },
         { .reg = 0x0000DC1C, .val = 0x0000FF9F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 224;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3) Unit: percent.";
         counter->symbol_name = "NonSamplerShader03AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 228;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__hdc_and_sf__non_sampler_shader04_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice4) Unit: percent.";
         counter->symbol_name = "NonSamplerShader04AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 232;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__hdc_and_sf__non_sampler_shader05_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice5) Unit: percent.";
         counter->symbol_name = "NonSamplerShader05AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 236;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__pixel_data00_ready__read;
      counter->name = "Slice0 Pipe0 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData00Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__pixel_data01_ready__read;
      counter->name = "Slice0 Pipe1 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe1 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData01Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__hdc_and_sf__pixel_data02_ready__read;
      counter->name = "Slice0 Pipe2 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe2 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData02Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "2bb016ee-a9b7-477e-8d5d-f7fd179a8f16";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 40);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1E055000 },
         { .reg = 0x00009888, .val = 0x1A0500C0 },
         { .reg = 0x00009888, .val = 0x1E065000 },
         { .reg = 0x00009888, .val = 0x1A0600C0 },
         { .reg = 0x00009888, .val = 0x1E075000 },
         { .reg = 0x00009888, .val = 0x1A0700C0 },
         { .reg = 0x00009888, .val = 0x2A0B7300 },
         { .reg = 0x00009888, .val = 0x2C0B0000 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x34005500 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x2A015400 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x0C05C000 },
         { .reg = 0x00009888, .val = 0x00052700 },
         { .reg = 0x00009888, .val = 0x10050000 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x2405CF0F },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x220500C0 },
         { .reg = 0x00009888, .val = 0x0C0600D0 },
         { .reg = 0x00009888, .val = 0x0E062700 },
         { .reg = 0x00009888, .val = 0x10060000 },
         { .reg = 0x00009888, .val = 0x1C064000 },
         { .reg = 0x00009888, .val = 0x24064140 },
         { .reg = 0x00009888, .val = 0x26060010 },
         { .reg = 0x00009888, .val = 0x22061000 },
         { .reg = 0x00009888, .val = 0x10098000 },
         { .reg = 0x00009888, .val = 0x1C090366 },
         { .reg = 0x00009888, .val = 0x1A091000 },
         { .reg = 0x00009888, .val = 0x0A07C000 },
         { .reg = 0x00009888, .val = 0x0E070027 },
         { .reg = 0x00009888, .val = 0x10070000 },
         { .reg = 0x00009888, .val = 0x24070000 },
         { .reg = 0x00009888, .val = 0x1A0A0104 },
         { .reg = 0x00009888, .val = 0x200D1040 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x120E4000 },
         { .reg = 0x00009888, .val = 0x000B0144 },
         { .reg = 0x00009888, .val = 0x0E0B0145 },
         { .reg = 0x00009888, .val = 0x100B0156 },
         { .reg = 0x00009888, .val = 0x040B014F },
         { .reg = 0x00009888, .val = 0x200B0000 },
         { .reg = 0x00009888, .val = 0x160B4000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x19151400 },
         { .reg = 0x00009888, .val = 0x21152800 },
         { .reg = 0x00009888, .val = 0x19351400 },
         { .reg = 0x00009888, .val = 0x21352800 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105FA5 },
         { .reg = 0x00009888, .val = 0x5D101550 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B146000 },
         { .reg = 0x00009888, .val = 0x1D14003E },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11A00A },
         { .reg = 0x00009888, .val = 0x61112AA0 },
         { .reg = 0x00009888, .val = 0x07150016 },
         { .reg = 0x00009888, .val = 0x09150096 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03168000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x0B350016 },
         { .reg = 0x00009888, .val = 0x0D350096 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x07364000 },
         { .reg = 0x00009888, .val = 0x47101800 },
         { .reg = 0x00009888, .val = 0x4D100131 },
         { .reg = 0x00009888, .val = 0x4F100808 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53101A00 },
         { .reg = 0x00009888, .val = 0x55100003 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x5710000A },
         { .reg = 0x00009888, .val = 0x49103803 },
         { .reg = 0x00009888, .val = 0x4B103131 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00000038 },
         { .reg = 0x0000D944, .val = 0x0000FFF8 },
         { .reg = 0x0000DC00, .val = 0x00000038 },
         { .reg = 0x0000DC04, .val = 0x0000FFF8 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__ps_output00_available__read;
      counter->name = "Slice0 Pipe0 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe0 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput00Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__ps_output01_available__read;
      counter->name = "Slice0 Pipe1 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe1 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput01Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__ps_output02_available__read;
      counter->name = "Slice0 Pipe2 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe2 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput02Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__pixel_values00_ready__read;
      counter->name = "Slice0 Pipe0 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe0 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues00Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__pixel_values01_ready__read;
      counter->name = "Slice0 Pipe1 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe1 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues01Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__pixel_values02_ready__read;
      counter->name = "Slice0 Pipe2 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe2 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues02Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__gt_request_queue00_full__read;
      counter->name = "SQ00 is full";
      counter->desc = "The percentage of time when IDI0 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue00Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__gt_request_queue01_full__read;
      counter->name = "SQ01 is full";
      counter->desc = "The percentage of time when IDI0 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue01Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__gt_request_queue10_full__read;
      counter->name = "SQ10 is full";
      counter->desc = "The percentage of time when IDI1 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue10Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__rasterizer_and_pixel_backend__gt_request_queue11_full__read;
      counter->name = "SQ11 is full";
      counter->desc = "The percentage of time when IDI1 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue11Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_1";
   query->symbol_name = "L3_1";
   query->guid = "cdccda4b-c7c9-41a9-965f-fc2adf8cff0b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04022000 },
         { .reg = 0x00009888, .val = 0x06022800 },
         { .reg = 0x00009888, .val = 0x04002827 },
         { .reg = 0x00009888, .val = 0x0600202C },
         { .reg = 0x00009888, .val = 0x00020024 },
         { .reg = 0x00009888, .val = 0x0E020025 },
         { .reg = 0x00009888, .val = 0x10020026 },
         { .reg = 0x00009888, .val = 0x12020027 },
         { .reg = 0x00009888, .val = 0x1402002C },
         { .reg = 0x00009888, .val = 0x1602002D },
         { .reg = 0x00009888, .val = 0x1802002E },
         { .reg = 0x00009888, .val = 0x1A02002F },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x2C020000 },
         { .reg = 0x00009888, .val = 0x1C000024 },
         { .reg = 0x00009888, .val = 0x1E000025 },
         { .reg = 0x00009888, .val = 0x02000026 },
         { .reg = 0x00009888, .val = 0x0800002D },
         { .reg = 0x00009888, .val = 0x0A00002E },
         { .reg = 0x00009888, .val = 0x0C00002F },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34001540 },
         { .reg = 0x00009888, .val = 0x36000000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x24053FFF },
         { .reg = 0x00009888, .val = 0x1C064000 },
         { .reg = 0x00009888, .val = 0x24065540 },
         { .reg = 0x00009888, .val = 0x26060005 },
         { .reg = 0x00009888, .val = 0x10098000 },
         { .reg = 0x00009888, .val = 0x1C0900FE },
         { .reg = 0x00009888, .val = 0x1C074000 },
         { .reg = 0x00009888, .val = 0x24071555 },
         { .reg = 0x00009888, .val = 0x160A1000 },
         { .reg = 0x00009888, .val = 0x180A5400 },
         { .reg = 0x00009888, .val = 0x1A0A0055 },
         { .reg = 0x00009888, .val = 0x1E0D4001 },
         { .reg = 0x00009888, .val = 0x200D0555 },
         { .reg = 0x00009888, .val = 0x040E4000 },
         { .reg = 0x00009888, .val = 0x0A0E8000 },
         { .reg = 0x00009888, .val = 0x0C0EC000 },
         { .reg = 0x00009888, .val = 0x0E0EC000 },
         { .reg = 0x00009888, .val = 0x100EC000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x120B4000 },
         { .reg = 0x00009888, .val = 0x140B4000 },
         { .reg = 0x00009888, .val = 0x160B4000 },
         { .reg = 0x00009888, .val = 0x180B4000 },
         { .reg = 0x00009888, .val = 0x1A0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11AAAA },
         { .reg = 0x00009888, .val = 0x61112AAA },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100919 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100000 },
         { .reg = 0x00009888, .val = 0x55101910 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100009 },
         { .reg = 0x00009888, .val = 0x49101919 },
         { .reg = 0x00009888, .val = 0x4B101919 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_1__l30_bank0_input_available__read;
         counter->name = "Slice0 L3 Bank0 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank0 has input available Unit: percent.";
         counter->symbol_name = "L30Bank0InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_1__l30_bank1_input_available__read;
         counter->name = "Slice0 L3 Bank1 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank1 has input available Unit: percent.";
         counter->symbol_name = "L30Bank1InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_1__l30_bank4_input_available__read;
         counter->name = "Slice0 L3 Bank4 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank4 has input available Unit: percent.";
         counter->symbol_name = "L30Bank4InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_1__l30_bank5_input_available__read;
         counter->name = "Slice0 L3 Bank5 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank5 has input available Unit: percent.";
         counter->symbol_name = "L30Bank5InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_2";
   query->symbol_name = "L3_2";
   query->guid = "3c44fb1b-f9b3-4d52-a507-e2b2a80ec8b7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04023027 },
         { .reg = 0x00009888, .val = 0x0602382C },
         { .reg = 0x00009888, .val = 0x04007000 },
         { .reg = 0x00009888, .val = 0x06007800 },
         { .reg = 0x00009888, .val = 0x1C020024 },
         { .reg = 0x00009888, .val = 0x1E020025 },
         { .reg = 0x00009888, .val = 0x02020026 },
         { .reg = 0x00009888, .val = 0x0802002D },
         { .reg = 0x00009888, .val = 0x0A02002E },
         { .reg = 0x00009888, .val = 0x0C02002F },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x2C020000 },
         { .reg = 0x00009888, .val = 0x00000024 },
         { .reg = 0x00009888, .val = 0x0E000025 },
         { .reg = 0x00009888, .val = 0x10000026 },
         { .reg = 0x00009888, .val = 0x12000027 },
         { .reg = 0x00009888, .val = 0x1400002C },
         { .reg = 0x00009888, .val = 0x1600002D },
         { .reg = 0x00009888, .val = 0x1800002E },
         { .reg = 0x00009888, .val = 0x1A00002F },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x2205FFF0 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x22065400 },
         { .reg = 0x00009888, .val = 0x24060015 },
         { .reg = 0x00009888, .val = 0x1C090301 },
         { .reg = 0x00009888, .val = 0x14098000 },
         { .reg = 0x00009888, .val = 0x1A09F000 },
         { .reg = 0x00009888, .val = 0x24074000 },
         { .reg = 0x00009888, .val = 0x26070001 },
         { .reg = 0x00009888, .val = 0x22075550 },
         { .reg = 0x00009888, .val = 0x1A0A0500 },
         { .reg = 0x00009888, .val = 0x160A4000 },
         { .reg = 0x00009888, .val = 0x180A0155 },
         { .reg = 0x00009888, .val = 0x200D5000 },
         { .reg = 0x00009888, .val = 0x1E0D1554 },
         { .reg = 0x00009888, .val = 0x120EC000 },
         { .reg = 0x00009888, .val = 0x040E8000 },
         { .reg = 0x00009888, .val = 0x060EC000 },
         { .reg = 0x00009888, .val = 0x080EC000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009888, .val = 0x020B4000 },
         { .reg = 0x00009888, .val = 0x040B4000 },
         { .reg = 0x00009888, .val = 0x060B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11AAAA },
         { .reg = 0x00009888, .val = 0x61112AAA },
         { .reg = 0x00009888, .val = 0x47100910 },
         { .reg = 0x00009888, .val = 0x4D101000 },
         { .reg = 0x00009888, .val = 0x4F101919 },
         { .reg = 0x00009888, .val = 0x51101919 },
         { .reg = 0x00009888, .val = 0x53101919 },
         { .reg = 0x00009888, .val = 0x55100009 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_2__l30_bank2_input_available__read;
         counter->name = "Slice0 L3 Bank2 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank2 has input available Unit: percent.";
         counter->symbol_name = "L30Bank2InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_2__l30_bank3_input_available__read;
         counter->name = "Slice0 L3 Bank3 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank3 has input available Unit: percent.";
         counter->symbol_name = "L30Bank3InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_2__l30_bank6_input_available__read;
         counter->name = "Slice0 L3 Bank6 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank6 has input available Unit: percent.";
         counter->symbol_name = "L30Bank6InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_2__l30_bank7_input_available__read;
         counter->name = "Slice0 L3 Bank7 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank7 has input available Unit: percent.";
         counter->symbol_name = "L30Bank7InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_3";
   query->symbol_name = "L3_3";
   query->guid = "ec911098-71bf-4432-9153-6143e3cfbe06";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04020200 },
         { .reg = 0x00009888, .val = 0x06020020 },
         { .reg = 0x00009888, .val = 0x04000200 },
         { .reg = 0x00009888, .val = 0x06004000 },
         { .reg = 0x00009888, .val = 0x08020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x0A000020 },
         { .reg = 0x00009888, .val = 0x0C000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x22050F00 },
         { .reg = 0x00009888, .val = 0x22064000 },
         { .reg = 0x00009888, .val = 0x24060001 },
         { .reg = 0x00009888, .val = 0x1A096000 },
         { .reg = 0x00009888, .val = 0x22070500 },
         { .reg = 0x00009888, .val = 0x180A0014 },
         { .reg = 0x00009888, .val = 0x1E0D0140 },
         { .reg = 0x00009888, .val = 0x060E8000 },
         { .reg = 0x00009888, .val = 0x080E4000 },
         { .reg = 0x00009888, .val = 0x060B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110AA0 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B101000 },
         { .reg = 0x00009888, .val = 0x4D100919 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_3__l30_bank0_output_ready__read;
         counter->name = "Slice0 L3 Bank0 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank0 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank0OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_3__l30_bank4_output_ready__read;
         counter->name = "Slice0 L3 Bank4 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank4 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank4OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_4";
   query->symbol_name = "L3_4";
   query->guid = "17d351db-7916-4eea-9817-29ca522daacd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04020A00 },
         { .reg = 0x00009888, .val = 0x06020800 },
         { .reg = 0x00009888, .val = 0x04000A00 },
         { .reg = 0x00009888, .val = 0x06000820 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2205F000 },
         { .reg = 0x00009888, .val = 0x24060014 },
         { .reg = 0x00009888, .val = 0x1A098000 },
         { .reg = 0x00009888, .val = 0x1C090001 },
         { .reg = 0x00009888, .val = 0x22075000 },
         { .reg = 0x00009888, .val = 0x180A0140 },
         { .reg = 0x00009888, .val = 0x1E0D1400 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110AA0 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49101000 },
         { .reg = 0x00009888, .val = 0x4B100919 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_4__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_4__l30_bank1_output_ready__read;
         counter->name = "Slice0 L3 Bank1 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank1 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank1OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_4__l30_bank5_output_ready__read;
         counter->name = "Slice0 L3 Bank5 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank5 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank5OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_l3_5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_5";
   query->symbol_name = "L3_5";
   query->guid = "f2de5846-f330-4d26-ad41-824deb6b53f6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04021200 },
         { .reg = 0x00009888, .val = 0x06021000 },
         { .reg = 0x00009888, .val = 0x04001200 },
         { .reg = 0x00009888, .val = 0x06001020 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2205F000 },
         { .reg = 0x00009888, .val = 0x24060014 },
         { .reg = 0x00009888, .val = 0x1A098000 },
         { .reg = 0x00009888, .val = 0x1C090001 },
         { .reg = 0x00009888, .val = 0x22075000 },
         { .reg = 0x00009888, .val = 0x180A0140 },
         { .reg = 0x00009888, .val = 0x1E0D1400 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110AA0 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49101000 },
         { .reg = 0x00009888, .val = 0x4B100919 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__l3_5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_5__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_5__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_5__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_5__l30_bank2_output_ready__read;
         counter->name = "Slice0 L3 Bank2 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank2 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank2OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_5__l30_bank6_output_ready__read;
         counter->name = "Slice0 L3 Bank6 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank6 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank6OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_l3_6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_6";
   query->symbol_name = "L3_6";
   query->guid = "8f4ccb12-ee11-4741-93d4-d431b8ca58ef";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04021A00 },
         { .reg = 0x00009888, .val = 0x06021800 },
         { .reg = 0x00009888, .val = 0x04001A00 },
         { .reg = 0x00009888, .val = 0x06001820 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x3A000000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2205F000 },
         { .reg = 0x00009888, .val = 0x24060014 },
         { .reg = 0x00009888, .val = 0x1A098000 },
         { .reg = 0x00009888, .val = 0x1C090001 },
         { .reg = 0x00009888, .val = 0x22075000 },
         { .reg = 0x00009888, .val = 0x180A0140 },
         { .reg = 0x00009888, .val = 0x1E0D1400 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x0A0E4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x5F110AA0 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49101000 },
         { .reg = 0x00009888, .val = 0x4B100919 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__l3_6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__l3_6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_6__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_6__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__l3_6__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_6__l30_bank3_output_ready__read;
         counter->name = "Slice0 L3 Bank3 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank3 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank3OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__l3_6__l30_bank7_output_ready__read;
         counter->name = "Slice0 L3 Bank7 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank7 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank7OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_1";
   query->symbol_name = "Sampler_1";
   query->guid = "d336f948-7e5c-41a9-be57-e3b01b8c829a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1C121600 },
         { .reg = 0x00009888, .val = 0x18141600 },
         { .reg = 0x00009888, .val = 0x1C325600 },
         { .reg = 0x00009888, .val = 0x18341600 },
         { .reg = 0x00009888, .val = 0x1C521600 },
         { .reg = 0x00009888, .val = 0x185416A6 },
         { .reg = 0x00009888, .val = 0x1C721600 },
         { .reg = 0x00009888, .val = 0x18741600 },
         { .reg = 0x00009888, .val = 0x1C921600 },
         { .reg = 0x00009888, .val = 0x18941600 },
         { .reg = 0x00009888, .val = 0x1CB21600 },
         { .reg = 0x00009888, .val = 0x18B41600 },
         { .reg = 0x00009888, .val = 0x04120086 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x02124000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x021400A6 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x00140000 },
         { .reg = 0x00009888, .val = 0x1A150020 },
         { .reg = 0x00009888, .val = 0x1E320086 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x1C3400A6 },
         { .reg = 0x00009888, .val = 0x10340000 },
         { .reg = 0x00009888, .val = 0x0E340000 },
         { .reg = 0x00009888, .val = 0x1C358000 },
         { .reg = 0x00009888, .val = 0x1A520086 },
         { .reg = 0x00009888, .val = 0x20520000 },
         { .reg = 0x00009888, .val = 0x18524000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x10540000 },
         { .reg = 0x00009888, .val = 0x0C540000 },
         { .reg = 0x00009888, .val = 0x1C550800 },
         { .reg = 0x00009888, .val = 0x16720086 },
         { .reg = 0x00009888, .val = 0x20720000 },
         { .reg = 0x00009888, .val = 0x14724000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x147400A6 },
         { .reg = 0x00009888, .val = 0x10740000 },
         { .reg = 0x00009888, .val = 0x0A740000 },
         { .reg = 0x00009888, .val = 0x1C750080 },
         { .reg = 0x00009888, .val = 0x12920086 },
         { .reg = 0x00009888, .val = 0x20920000 },
         { .reg = 0x00009888, .val = 0x10924000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x109400A6 },
         { .reg = 0x00009888, .val = 0x08940000 },
         { .reg = 0x00009888, .val = 0x1C950008 },
         { .reg = 0x00009888, .val = 0x0EB20086 },
         { .reg = 0x00009888, .val = 0x20B20000 },
         { .reg = 0x00009888, .val = 0x00B24000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30200 },
         { .reg = 0x00009888, .val = 0x00B400A6 },
         { .reg = 0x00009888, .val = 0x10B40000 },
         { .reg = 0x00009888, .val = 0x18B58000 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34005540 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x2405BFFF },
         { .reg = 0x00009888, .val = 0x26050002 },
         { .reg = 0x00009888, .val = 0x22050050 },
         { .reg = 0x00009888, .val = 0x14088000 },
         { .reg = 0x00009888, .val = 0x1A081000 },
         { .reg = 0x00009888, .val = 0x1C064000 },
         { .reg = 0x00009888, .val = 0x2406A540 },
         { .reg = 0x00009888, .val = 0x26060005 },
         { .reg = 0x00009888, .val = 0x10098000 },
         { .reg = 0x00009888, .val = 0x1C0900FE },
         { .reg = 0x00009888, .val = 0x1C078000 },
         { .reg = 0x00009888, .val = 0x24070002 },
         { .reg = 0x00009888, .val = 0x160A1000 },
         { .reg = 0x00009888, .val = 0x180AA400 },
         { .reg = 0x00009888, .val = 0x1E0D4001 },
         { .reg = 0x00009888, .val = 0x200D0005 },
         { .reg = 0x00009888, .val = 0x040E4000 },
         { .reg = 0x00009888, .val = 0x0A0E8000 },
         { .reg = 0x00009888, .val = 0x0C0EC000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x120B4000 },
         { .reg = 0x00009888, .val = 0x180B8000 },
         { .reg = 0x00009888, .val = 0x1A0B8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105005 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11A00A },
         { .reg = 0x00009888, .val = 0x61112AAA },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100204 },
         { .reg = 0x00009888, .val = 0x51101A06 },
         { .reg = 0x00009888, .val = 0x53101803 },
         { .reg = 0x00009888, .val = 0x55101311 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100007 },
         { .reg = 0x00009888, .val = 0x49100511 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00001800 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00001800 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00006000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00006000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_1__sampler00_input_available__read;
         counter->name = "Slice0 DualSubslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_1__sampler01_input_available__read;
         counter->name = "Slice0 DualSubslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_1__sampler02_input_available__read;
         counter->name = "Slice0 DualSubslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_1__sampler03_input_available__read;
         counter->name = "Slice0 DualSubslice3 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice3 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler03InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_1__sampler04_input_available__read;
         counter->name = "Slice0 DualSubslice4 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice4 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler04InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_1__sampler05_input_available__read;
         counter->name = "Slice0 DualSubslice5 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice5 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler05InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_sampler_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_2";
   query->symbol_name = "Sampler_2";
   query->guid = "c2929b2d-4ce9-4161-a1ac-fff0a75436b9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0C123E00 },
         { .reg = 0x00009888, .val = 0x04143E00 },
         { .reg = 0x00009888, .val = 0x0C323E00 },
         { .reg = 0x00009888, .val = 0x04343E00 },
         { .reg = 0x00009888, .val = 0x0C523E00 },
         { .reg = 0x00009888, .val = 0x04543E00 },
         { .reg = 0x00009888, .val = 0x0C723E00 },
         { .reg = 0x00009888, .val = 0x04743E00 },
         { .reg = 0x00009888, .val = 0x0C923E00 },
         { .reg = 0x00009888, .val = 0x04943E00 },
         { .reg = 0x00009888, .val = 0x0CB23E00 },
         { .reg = 0x00009888, .val = 0x04B43E00 },
         { .reg = 0x00009888, .val = 0x04120033 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x02124000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x02140013 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x00140000 },
         { .reg = 0x00009888, .val = 0x1A150020 },
         { .reg = 0x00009888, .val = 0x1E320033 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x1C324000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x1C340013 },
         { .reg = 0x00009888, .val = 0x10340000 },
         { .reg = 0x00009888, .val = 0x0E340000 },
         { .reg = 0x00009888, .val = 0x1C358000 },
         { .reg = 0x00009888, .val = 0x1A520033 },
         { .reg = 0x00009888, .val = 0x20520000 },
         { .reg = 0x00009888, .val = 0x18524000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x18540013 },
         { .reg = 0x00009888, .val = 0x10540000 },
         { .reg = 0x00009888, .val = 0x0C540000 },
         { .reg = 0x00009888, .val = 0x1C550800 },
         { .reg = 0x00009888, .val = 0x16720033 },
         { .reg = 0x00009888, .val = 0x20720000 },
         { .reg = 0x00009888, .val = 0x14724000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x14740013 },
         { .reg = 0x00009888, .val = 0x10740000 },
         { .reg = 0x00009888, .val = 0x0A740000 },
         { .reg = 0x00009888, .val = 0x1C750080 },
         { .reg = 0x00009888, .val = 0x12920033 },
         { .reg = 0x00009888, .val = 0x20920000 },
         { .reg = 0x00009888, .val = 0x10924000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x10940013 },
         { .reg = 0x00009888, .val = 0x08940000 },
         { .reg = 0x00009888, .val = 0x1C950008 },
         { .reg = 0x00009888, .val = 0x0EB20033 },
         { .reg = 0x00009888, .val = 0x20B20000 },
         { .reg = 0x00009888, .val = 0x00B24000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30200 },
         { .reg = 0x00009888, .val = 0x00B40013 },
         { .reg = 0x00009888, .val = 0x10B40000 },
         { .reg = 0x00009888, .val = 0x18B58000 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34005540 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x2405BFFF },
         { .reg = 0x00009888, .val = 0x26050002 },
         { .reg = 0x00009888, .val = 0x22050050 },
         { .reg = 0x00009888, .val = 0x14088000 },
         { .reg = 0x00009888, .val = 0x1A081000 },
         { .reg = 0x00009888, .val = 0x1C064000 },
         { .reg = 0x00009888, .val = 0x2406A540 },
         { .reg = 0x00009888, .val = 0x26060005 },
         { .reg = 0x00009888, .val = 0x10098000 },
         { .reg = 0x00009888, .val = 0x1C0900FE },
         { .reg = 0x00009888, .val = 0x1C078000 },
         { .reg = 0x00009888, .val = 0x24070002 },
         { .reg = 0x00009888, .val = 0x160A1000 },
         { .reg = 0x00009888, .val = 0x180AA400 },
         { .reg = 0x00009888, .val = 0x1E0D4001 },
         { .reg = 0x00009888, .val = 0x200D0005 },
         { .reg = 0x00009888, .val = 0x040E4000 },
         { .reg = 0x00009888, .val = 0x0A0E8000 },
         { .reg = 0x00009888, .val = 0x0C0EC000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x120B4000 },
         { .reg = 0x00009888, .val = 0x180B8000 },
         { .reg = 0x00009888, .val = 0x1A0B8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105005 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14E000 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11A00A },
         { .reg = 0x00009888, .val = 0x61112AAA },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100204 },
         { .reg = 0x00009888, .val = 0x51101A06 },
         { .reg = 0x00009888, .val = 0x53101803 },
         { .reg = 0x00009888, .val = 0x55101311 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100007 },
         { .reg = 0x00009888, .val = 0x49100511 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00001800 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00001800 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00006000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00006000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__sampler_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__sampler_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__sampler_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_2__sampler00_output_ready__read;
         counter->name = "Slice0 DualSubslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_2__sampler01_output_ready__read;
         counter->name = "Slice0 DualSubslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_2__sampler02_output_ready__read;
         counter->name = "Slice0 DualSubslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_2__sampler03_output_ready__read;
         counter->name = "Slice0 DualSubslice3 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice3 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler03OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_2__sampler04_output_ready__read;
         counter->name = "Slice0 DualSubslice4 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice4 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler04OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__sampler_2__sampler05_output_ready__read;
         counter->name = "Slice0 DualSubslice5 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice5 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler05OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "a98b240c-71b7-4e72-b167-8538a581e26d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x2611001C },
         { .reg = 0x00009888, .val = 0x2631001C },
         { .reg = 0x00009888, .val = 0x2651001C },
         { .reg = 0x00009888, .val = 0x2671001C },
         { .reg = 0x00009888, .val = 0x2691001C },
         { .reg = 0x00009888, .val = 0x26B1001C },
         { .reg = 0x00009888, .val = 0x16110103 },
         { .reg = 0x00009888, .val = 0x1C110104 },
         { .reg = 0x00009888, .val = 0x1E110105 },
         { .reg = 0x00009888, .val = 0x02110106 },
         { .reg = 0x00009888, .val = 0x04110107 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x0A110000 },
         { .reg = 0x00009888, .val = 0x0E110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x16128000 },
         { .reg = 0x00009888, .val = 0x1C128000 },
         { .reg = 0x00009888, .val = 0x1E128000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x1C132000 },
         { .reg = 0x00009888, .val = 0x1E130003 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x14310103 },
         { .reg = 0x00009888, .val = 0x06310104 },
         { .reg = 0x00009888, .val = 0x08310105 },
         { .reg = 0x00009888, .val = 0x0A310106 },
         { .reg = 0x00009888, .val = 0x0C310107 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x02310000 },
         { .reg = 0x00009888, .val = 0x04310000 },
         { .reg = 0x00009888, .val = 0x14328000 },
         { .reg = 0x00009888, .val = 0x06328000 },
         { .reg = 0x00009888, .val = 0x08328000 },
         { .reg = 0x00009888, .val = 0x0A328000 },
         { .reg = 0x00009888, .val = 0x0C328000 },
         { .reg = 0x00009888, .val = 0x1C331100 },
         { .reg = 0x00009888, .val = 0x16338000 },
         { .reg = 0x00009888, .val = 0x18338000 },
         { .reg = 0x00009888, .val = 0x1A338000 },
         { .reg = 0x00009888, .val = 0x12510103 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x08510000 },
         { .reg = 0x00009888, .val = 0x12528000 },
         { .reg = 0x00009888, .val = 0x1C530800 },
         { .reg = 0x00009888, .val = 0x10710103 },
         { .reg = 0x00009888, .val = 0x08710000 },
         { .reg = 0x00009888, .val = 0x10728000 },
         { .reg = 0x00009888, .val = 0x1C730400 },
         { .reg = 0x00009888, .val = 0x0E910103 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x06910000 },
         { .reg = 0x00009888, .val = 0x0E928000 },
         { .reg = 0x00009888, .val = 0x1C930200 },
         { .reg = 0x00009888, .val = 0x00B10103 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34004140 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x06004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2A010500 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x240541BF },
         { .reg = 0x00009888, .val = 0x26050001 },
         { .reg = 0x00009888, .val = 0x2205AA50 },
         { .reg = 0x00009888, .val = 0x1C080320 },
         { .reg = 0x00009888, .val = 0x14088000 },
         { .reg = 0x00009888, .val = 0x1A081000 },
         { .reg = 0x00009888, .val = 0x1C064000 },
         { .reg = 0x00009888, .val = 0x24060640 },
         { .reg = 0x00009888, .val = 0x10098000 },
         { .reg = 0x00009888, .val = 0x1C09000E },
         { .reg = 0x00009888, .val = 0x1C078000 },
         { .reg = 0x00009888, .val = 0x160A1000 },
         { .reg = 0x00009888, .val = 0x180A0800 },
         { .reg = 0x00009888, .val = 0x1E0D4001 },
         { .reg = 0x00009888, .val = 0x040E4000 },
         { .reg = 0x00009888, .val = 0x0A0E8000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x120B8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101415 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D140033 },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11AAAA },
         { .reg = 0x00009888, .val = 0x6111282A },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100313 },
         { .reg = 0x00009888, .val = 0x4F100A02 },
         { .reg = 0x00009888, .val = 0x51101318 },
         { .reg = 0x00009888, .val = 0x53100001 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101110 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x49101111 },
         { .reg = 0x00009888, .val = 0x4B101313 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000002 },
         { .reg = 0x0000D94C, .val = 0x0000FFFD },
         { .reg = 0x0000DC08, .val = 0x00000002 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFD },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFFB },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFFB },
         { .reg = 0x0000D958, .val = 0x00000002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00000002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00000002 },
         { .reg = 0x0000D964, .val = 0x0000FFEF },
         { .reg = 0x0000DC20, .val = 0x00000002 },
         { .reg = 0x0000DC24, .val = 0x0000FFEF },
         { .reg = 0x0000D968, .val = 0x00000002 },
         { .reg = 0x0000D96C, .val = 0x0000FFDF },
         { .reg = 0x0000DC28, .val = 0x00000002 },
         { .reg = 0x0000DC2C, .val = 0x0000FFDF },
         { .reg = 0x0000D970, .val = 0x00007800 },
         { .reg = 0x0000D974, .val = 0x0000F0FF },
         { .reg = 0x0000DC30, .val = 0x00007800 },
         { .reg = 0x0000DC34, .val = 0x0000F0FF },
         { .reg = 0x0000D978, .val = 0x00078000 },
         { .reg = 0x0000D97C, .val = 0x00000FFF },
         { .reg = 0x0000DC38, .val = 0x00078000 },
         { .reg = 0x0000DC3C, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__non_ps_thread03_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__non_ps_thread04_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__non_ps_thread05_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header00_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header00_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header01_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header01_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header00_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader00Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 148;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_1__thread_header01_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader01Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 152;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "5e7aa748-e1a6-4282-b3ae-48dbfb06dcf8";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 24);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x24110340 },
         { .reg = 0x00009888, .val = 0x24310340 },
         { .reg = 0x00009888, .val = 0x24510340 },
         { .reg = 0x00009888, .val = 0x24710340 },
         { .reg = 0x00009888, .val = 0x24910340 },
         { .reg = 0x00009888, .val = 0x24B10340 },
         { .reg = 0x00009888, .val = 0x26B1001C },
         { .reg = 0x00009888, .val = 0x021100F3 },
         { .reg = 0x00009888, .val = 0x041100F2 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x1C3100F3 },
         { .reg = 0x00009888, .val = 0x1E3100F2 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x0E310000 },
         { .reg = 0x00009888, .val = 0x1C328000 },
         { .reg = 0x00009888, .val = 0x1E328000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x185100F3 },
         { .reg = 0x00009888, .val = 0x1A5100F2 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x0C510000 },
         { .reg = 0x00009888, .val = 0x18528000 },
         { .reg = 0x00009888, .val = 0x1A528000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x147100F3 },
         { .reg = 0x00009888, .val = 0x167100F2 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0A710000 },
         { .reg = 0x00009888, .val = 0x14728000 },
         { .reg = 0x00009888, .val = 0x16728000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x109100F3 },
         { .reg = 0x00009888, .val = 0x129100F2 },
         { .reg = 0x00009888, .val = 0x08910000 },
         { .reg = 0x00009888, .val = 0x10928000 },
         { .reg = 0x00009888, .val = 0x12928000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x00B100F3 },
         { .reg = 0x00009888, .val = 0x0EB100F2 },
         { .reg = 0x00009888, .val = 0x06B10104 },
         { .reg = 0x00009888, .val = 0x08B10105 },
         { .reg = 0x00009888, .val = 0x0AB10106 },
         { .reg = 0x00009888, .val = 0x0CB10107 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x02B10000 },
         { .reg = 0x00009888, .val = 0x04B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x0EB28000 },
         { .reg = 0x00009888, .val = 0x06B28000 },
         { .reg = 0x00009888, .val = 0x08B28000 },
         { .reg = 0x00009888, .val = 0x0AB28000 },
         { .reg = 0x00009888, .val = 0x0CB28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30300 },
         { .reg = 0x00009888, .val = 0x16B38000 },
         { .reg = 0x00009888, .val = 0x18B38000 },
         { .reg = 0x00009888, .val = 0x1AB38000 },
         { .reg = 0x00009888, .val = 0x00004000 },
         { .reg = 0x00009888, .val = 0x0E004000 },
         { .reg = 0x00009888, .val = 0x10004000 },
         { .reg = 0x00009888, .val = 0x12004000 },
         { .reg = 0x00009888, .val = 0x34005540 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x06004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x1C05C000 },
         { .reg = 0x00009888, .val = 0x2405BFFF },
         { .reg = 0x00009888, .val = 0x26050002 },
         { .reg = 0x00009888, .val = 0x2205FF50 },
         { .reg = 0x00009888, .val = 0x14088000 },
         { .reg = 0x00009888, .val = 0x1A081000 },
         { .reg = 0x00009888, .val = 0x1C064000 },
         { .reg = 0x00009888, .val = 0x2406A555 },
         { .reg = 0x00009888, .val = 0x26060005 },
         { .reg = 0x00009888, .val = 0x22064000 },
         { .reg = 0x00009888, .val = 0x10098000 },
         { .reg = 0x00009888, .val = 0x1C0900FF },
         { .reg = 0x00009888, .val = 0x1A09E000 },
         { .reg = 0x00009888, .val = 0x1C078000 },
         { .reg = 0x00009888, .val = 0x24070002 },
         { .reg = 0x00009888, .val = 0x2207AA00 },
         { .reg = 0x00009888, .val = 0x160A1000 },
         { .reg = 0x00009888, .val = 0x180AA554 },
         { .reg = 0x00009888, .val = 0x1E0D5541 },
         { .reg = 0x00009888, .val = 0x200D0005 },
         { .reg = 0x00009888, .val = 0x040E4000 },
         { .reg = 0x00009888, .val = 0x0A0EC000 },
         { .reg = 0x00009888, .val = 0x0C0EC000 },
         { .reg = 0x00009888, .val = 0x060E8000 },
         { .reg = 0x00009888, .val = 0x080EC000 },
         { .reg = 0x00009888, .val = 0x000B4000 },
         { .reg = 0x00009888, .val = 0x0E0B4000 },
         { .reg = 0x00009888, .val = 0x100B4000 },
         { .reg = 0x00009888, .val = 0x120B4000 },
         { .reg = 0x00009888, .val = 0x180B8000 },
         { .reg = 0x00009888, .val = 0x1A0B8000 },
         { .reg = 0x00009888, .val = 0x060B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009888, .val = 0x0C0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x11148000 },
         { .reg = 0x00009888, .val = 0x1B14FC00 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x4B114000 },
         { .reg = 0x00009888, .val = 0x5F11AAAA },
         { .reg = 0x00009888, .val = 0x61112AAA },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100200 },
         { .reg = 0x00009888, .val = 0x51100A02 },
         { .reg = 0x00009888, .val = 0x5310080A },
         { .reg = 0x00009888, .val = 0x55101318 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100003 },
         { .reg = 0x00009888, .val = 0x49100111 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x70800000 },
         { .reg = 0x0000DC40, .val = 0x007F0000 },
         { .reg = 0x0000D940, .val = 0x00000000 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000000 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000000 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000000 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000000 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000000 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000000 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000000 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00000000 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00000000 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00000000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00000000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
         { .reg = 0x0000D970, .val = 0x00078000 },
         { .reg = 0x0000D974, .val = 0x00000FFF },
         { .reg = 0x0000DC30, .val = 0x00078000 },
         { .reg = 0x0000DC34, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__ps_thread03_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__ps_thread04_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__ps_thread05_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__thread_header05_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader05Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__thread_header05_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__thread_header05_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__thread_header05_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_2__thread_header05_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_tdl_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_3";
   query->symbol_name = "TDL_3";
   query->guid = "e12f8fc0-2eb9-4bf6-bf44-a13ceb42c9f7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 28);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x2651001C },
         { .reg = 0x00009888, .val = 0x2671001C },
         { .reg = 0x00009888, .val = 0x2691001C },
         { .reg = 0x00009888, .val = 0x06510107 },
         { .reg = 0x00009888, .val = 0x08510106 },
         { .reg = 0x00009888, .val = 0x0A510105 },
         { .reg = 0x00009888, .val = 0x0C510104 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x02510000 },
         { .reg = 0x00009888, .val = 0x04510000 },
         { .reg = 0x00009888, .val = 0x06528000 },
         { .reg = 0x00009888, .val = 0x08528000 },
         { .reg = 0x00009888, .val = 0x0A528000 },
         { .reg = 0x00009888, .val = 0x0C528000 },
         { .reg = 0x00009888, .val = 0x16538000 },
         { .reg = 0x00009888, .val = 0x18538000 },
         { .reg = 0x00009888, .val = 0x1A538000 },
         { .reg = 0x00009888, .val = 0x1C530100 },
         { .reg = 0x00009888, .val = 0x1C710107 },
         { .reg = 0x00009888, .val = 0x1E710106 },
         { .reg = 0x00009888, .val = 0x02710105 },
         { .reg = 0x00009888, .val = 0x04710104 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0E710000 },
         { .reg = 0x00009888, .val = 0x00710000 },
         { .reg = 0x00009888, .val = 0x1C728000 },
         { .reg = 0x00009888, .val = 0x1E728000 },
         { .reg = 0x00009888, .val = 0x02728000 },
         { .reg = 0x00009888, .val = 0x04728000 },
         { .reg = 0x00009888, .val = 0x1E730003 },
         { .reg = 0x00009888, .val = 0x12738000 },
         { .reg = 0x00009888, .val = 0x14738000 },
         { .reg = 0x00009888, .val = 0x14910107 },
         { .reg = 0x00009888, .val = 0x16910106 },
         { .reg = 0x00009888, .val = 0x18910105 },
         { .reg = 0x00009888, .val = 0x1A910104 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x0A910000 },
         { .reg = 0x00009888, .val = 0x0C910000 },
         { .reg = 0x00009888, .val = 0x14928000 },
         { .reg = 0x00009888, .val = 0x16928000 },
         { .reg = 0x00009888, .val = 0x18928000 },
         { .reg = 0x00009888, .val = 0x1A928000 },
         { .reg = 0x00009888, .val = 0x1C93F000 },
         { .reg = 0x00009888, .val = 0x34005540 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x02004000 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x06004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x0C004000 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x02014000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2405FFC0 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x2205FFF0 },
         { .reg = 0x00009888, .val = 0x24065015 },
         { .reg = 0x00009888, .val = 0x260600A5 },
         { .reg = 0x00009888, .val = 0x22066800 },
         { .reg = 0x00009888, .val = 0x1C0903F1 },
         { .reg = 0x00009888, .val = 0x14098000 },
         { .reg = 0x00009888, .val = 0x1A09F000 },
         { .reg = 0x00009888, .val = 0x1A0A00AA },
         { .reg = 0x00009888, .val = 0x200D0550 },
         { .reg = 0x00009888, .val = 0x0E0EC000 },
         { .reg = 0x00009888, .val = 0x100EC000 },
         { .reg = 0x00009888, .val = 0x140B4000 },
         { .reg = 0x00009888, .val = 0x160B4000 },
         { .reg = 0x00009888, .val = 0x180B4000 },
         { .reg = 0x00009888, .val = 0x1A0B4000 },
         { .reg = 0x00009888, .val = 0x060B8000 },
         { .reg = 0x00009888, .val = 0x080B8000 },
         { .reg = 0x00009888, .val = 0x0A0B8000 },
         { .reg = 0x00009888, .val = 0x0C0B8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5D101554 },
         { .reg = 0x00009888, .val = 0x5B100555 },
         { .reg = 0x00009888, .val = 0x1D14003F },
         { .reg = 0x00009888, .val = 0x13148000 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x17148000 },
         { .reg = 0x00009888, .val = 0x1B141C00 },
         { .reg = 0x00009888, .val = 0x61112AA8 },
         { .reg = 0x00009888, .val = 0x5F110AAA },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100000 },
         { .reg = 0x00009888, .val = 0x55100800 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100008 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100808 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009888, .val = 0x4D100606 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x70800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00070000 },
         { .reg = 0x0000D940, .val = 0x00078000 },
         { .reg = 0x0000D944, .val = 0x00000FFF },
         { .reg = 0x0000DC00, .val = 0x00078000 },
         { .reg = 0x0000DC04, .val = 0x00000FFF },
         { .reg = 0x0000D948, .val = 0x00007800 },
         { .reg = 0x0000D94C, .val = 0x0000F0FF },
         { .reg = 0x0000DC08, .val = 0x00007800 },
         { .reg = 0x0000DC0C, .val = 0x0000F0FF },
         { .reg = 0x0000D950, .val = 0x00000780 },
         { .reg = 0x0000D954, .val = 0x0000FF0F },
         { .reg = 0x0000DC10, .val = 0x00000780 },
         { .reg = 0x0000DC14, .val = 0x0000FF0F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__tdl_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__tdl_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__tdl_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header02_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader02Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header03_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader03Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header04_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader04Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header02_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header02_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header03_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header03_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header03_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header03_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header04_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header04_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header04_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = dg1__tdl_3__thread_header04_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 148;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "GpuBusyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "81be185c-7ac7-4a17-85fc-4144ccba4a46";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 22);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x22100009 },
         { .reg = 0x00009888, .val = 0x04004000 },
         { .reg = 0x00009888, .val = 0x08004000 },
         { .reg = 0x00009888, .val = 0x0A004000 },
         { .reg = 0x00009888, .val = 0x04014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x22053CC0 },
         { .reg = 0x00009888, .val = 0x22061000 },
         { .reg = 0x00009888, .val = 0x24060005 },
         { .reg = 0x00009888, .val = 0x1A09D000 },
         { .reg = 0x00009888, .val = 0x020C4000 },
         { .reg = 0x00009888, .val = 0x040CC000 },
         { .reg = 0x00009888, .val = 0x1E0D0A20 },
         { .reg = 0x00009888, .val = 0x040F0043 },
         { .reg = 0x00009888, .val = 0x0A0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x080F8000 },
         { .reg = 0x00009888, .val = 0x081000A3 },
         { .reg = 0x00009888, .val = 0x00100000 },
         { .reg = 0x00009888, .val = 0x040B4000 },
         { .reg = 0x00009888, .val = 0x080B4000 },
         { .reg = 0x00009888, .val = 0x0A0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x15102400 },
         { .reg = 0x00009888, .val = 0x230B0120 },
         { .reg = 0x00009888, .val = 0x15182400 },
         { .reg = 0x00009888, .val = 0x232B0120 },
         { .reg = 0x00009888, .val = 0x17100023 },
         { .reg = 0x00009888, .val = 0x11100000 },
         { .reg = 0x00009888, .val = 0x5D101000 },
         { .reg = 0x00009888, .val = 0x5B100545 },
         { .reg = 0x00009888, .val = 0x15148000 },
         { .reg = 0x00009888, .val = 0x1B140C00 },
         { .reg = 0x00009888, .val = 0x61113000 },
         { .reg = 0x00009888, .val = 0x5F110E89 },
         { .reg = 0x00009888, .val = 0x05128000 },
         { .reg = 0x00009888, .val = 0x13138000 },
         { .reg = 0x00009888, .val = 0x1B0D0040 },
         { .reg = 0x00009888, .val = 0x1F0B00D3 },
         { .reg = 0x00009888, .val = 0x210B0000 },
         { .reg = 0x00009888, .val = 0x1B170002 },
         { .reg = 0x00009888, .val = 0x0D174000 },
         { .reg = 0x00009888, .val = 0x071800A3 },
         { .reg = 0x00009888, .val = 0x11180000 },
         { .reg = 0x00009888, .val = 0x032D4000 },
         { .reg = 0x00009888, .val = 0x032B00D3 },
         { .reg = 0x00009888, .val = 0x212B0000 },
         { .reg = 0x00009888, .val = 0x55101000 },
         { .reg = 0x00009888, .val = 0x57100007 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49103203 },
         { .reg = 0x00009888, .val = 0x4B100005 },
         { .reg = 0x00009888, .val = 0x4D100912 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00024002 },
         { .reg = 0x0000D944, .val = 0x0000B7FF },
         { .reg = 0x0000DC00, .val = 0x00024002 },
         { .reg = 0x0000DC04, .val = 0x0000B7FF },
         { .reg = 0x0000D948, .val = 0x0007F000 },
         { .reg = 0x0000D94C, .val = 0x000001FF },
         { .reg = 0x0000DC08, .val = 0x0007F000 },
         { .reg = 0x0000DC0C, .val = 0x000001FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__gpu_busyness__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__posh_engine_busy__read;
      counter->name = "Posh Ring Busy";
      counter->desc = "The percentage of time when posh command streamer was busy. Unit: percent.";
      counter->symbol_name = "PoshEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__gpu_busyness__any_engine_busy__read;
      counter->name = "Any Engine Busy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity1";
   query->symbol_name = "EuActivity1";
   query->guid = "5354b8d9-12fd-44eb-8f7d-0a4dffae4409";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00810710 },
         { .reg = 0x0000E558, .val = 0x00A10910 },
         { .reg = 0x0000E658, .val = 0x00850750 },
         { .reg = 0x0000E758, .val = 0x00A50950 },
         { .reg = 0x0000E45C, .val = 0x00802702 },
         { .reg = 0x0000E55C, .val = 0x00A02902 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity1__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity1__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity1__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity1__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity1__vs_fpu_active__read;
      counter->name = "VS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity1__ps_fpu_active__read;
      counter->name = "PS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpuActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity1__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity2";
   query->symbol_name = "EuActivity2";
   query->guid = "3225b6c6-805a-4e93-aaa5-5f09d87b31f9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00862762 },
         { .reg = 0x0000E558, .val = 0x00A62962 },
         { .reg = 0x0000E658, .val = 0x00860760 },
         { .reg = 0x0000E758, .val = 0x00A60960 },
         { .reg = 0x0000E45C, .val = 0x00861761 },
         { .reg = 0x0000E55C, .val = 0x00A61961 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity2__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity2__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity2__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity2__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity2__cs_em_active__read;
      counter->name = "CS EM Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsEmActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity2__cs_fpu_active__read;
      counter->name = "CS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity2__cs_send_active__read;
      counter->name = "CS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsSendActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity3";
   query->symbol_name = "EuActivity3";
   query->guid = "c423d483-13da-4bae-9b61-c13ec19ee450";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00811711 },
         { .reg = 0x0000E558, .val = 0x00A11911 },
         { .reg = 0x0000E658, .val = 0x00851751 },
         { .reg = 0x0000E758, .val = 0x00A51951 },
         { .reg = 0x0000E45C, .val = 0x00852752 },
         { .reg = 0x0000E55C, .val = 0x00A52952 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity3__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity3__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity3__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity3__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity3__vs_em_active__read;
      counter->name = "VS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsEmActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity3__ps_em_active__read;
      counter->name = "PS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsEmActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity4";
   query->symbol_name = "EuActivity4";
   query->guid = "c7388ce3-9e3f-4804-a215-771791c4ea3c";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00820720 },
         { .reg = 0x0000E558, .val = 0x00A20920 },
         { .reg = 0x0000E658, .val = 0x00830730 },
         { .reg = 0x0000E758, .val = 0x00A30930 },
         { .reg = 0x0000E45C, .val = 0x00812712 },
         { .reg = 0x0000E55C, .val = 0x00A12912 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity4__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity4__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity4__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity4__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity4__hs_fpu_active__read;
      counter->name = "HS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsFpuActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity4__ds_fpu_active__read;
      counter->name = "DS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsFpuActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity5";
   query->symbol_name = "EuActivity5";
   query->guid = "7ebc4a0a-5dc3-4de3-a8f8-2a6d1a95427f";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00821721 },
         { .reg = 0x0000E558, .val = 0x00A21921 },
         { .reg = 0x0000E658, .val = 0x00831731 },
         { .reg = 0x0000E758, .val = 0x00A31931 },
         { .reg = 0x0000E45C, .val = 0x00822722 },
         { .reg = 0x0000E55C, .val = 0x00A22922 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity5__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity5__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity5__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity5__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity5__hs_em_active__read;
      counter->name = "HS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a hull shader instructions. Unit: percent.";
      counter->symbol_name = "HsEmActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity5__ds_em_active__read;
      counter->name = "DS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsEmActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity5__hs_send_active__read;
      counter->name = "HS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsSendActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity6";
   query->symbol_name = "EuActivity6";
   query->guid = "811a34c7-87f3-4d4d-b366-d4b5f5dc5be1";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00840740 },
         { .reg = 0x0000E558, .val = 0x00A40940 },
         { .reg = 0x0000E658, .val = 0x00841741 },
         { .reg = 0x0000E758, .val = 0x00A41941 },
         { .reg = 0x0000E45C, .val = 0x00842742 },
         { .reg = 0x0000E55C, .val = 0x00A42942 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity6__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity6__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity6__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity6__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity6__gs_fpu_active__read;
      counter->name = "GS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsFpuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity6__gs_em_active__read;
      counter->name = "GS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsEmActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity6__gs_send_active__read;
      counter->name = "GS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a geometry shader instruction. Unit: percent.";
      counter->symbol_name = "GsSendActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity7_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity7";
   query->symbol_name = "EuActivity7";
   query->guid = "6ec13866-51c1-46c3-818b-c63cf38efcd5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00803703 },
         { .reg = 0x0000E558, .val = 0x00A03903 },
         { .reg = 0x0000E658, .val = 0x00800700 },
         { .reg = 0x0000E758, .val = 0x00A00900 },
         { .reg = 0x0000E45C, .val = 0x00801701 },
         { .reg = 0x0000E55C, .val = 0x00A01901 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity7__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__fpu_active__read;
      counter->name = "EU FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "FpuActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__em_active__read;
      counter->name = "EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__eu_fpu_em_active__read;
      counter->name = "EU FPU And EM Pipes Active";
      counter->desc = "The percentage of time in which EU FPU and EM pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuEmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity7__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity7__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_eu_activity8_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity8";
   query->symbol_name = "EuActivity8";
   query->guid = "4bc9e67a-0ebd-4d65-86cd-2eb961ccac59";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 16);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0F2400 },
         { .reg = 0x00009888, .val = 0x220F0009 },
         { .reg = 0x00009888, .val = 0x34004000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010005 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060050 },
         { .reg = 0x00009888, .val = 0x1C090300 },
         { .reg = 0x00009888, .val = 0x0E0CC000 },
         { .reg = 0x00009888, .val = 0x200DA000 },
         { .reg = 0x00009888, .val = 0x1C0F0043 },
         { .reg = 0x00009888, .val = 0x1E0F00B3 },
         { .reg = 0x00009888, .val = 0x200F0000 },
         { .reg = 0x00009888, .val = 0x1C0B4000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1D140030 },
         { .reg = 0x00009888, .val = 0x61112800 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101A1A },
         { .reg = 0x00009888, .val = 0x4B101A1A },
         { .reg = 0x00009888, .val = 0x4D100A1A },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00832732 },
         { .reg = 0x0000E558, .val = 0x00A32932 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__eu_activity8__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity8__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity8__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity8__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity8__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__eu_activity8__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = dg1__eu_activity8__ds_send_active__read;
      counter->name = "DS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a domain shader instruction. Unit: percent.";
      counter->symbol_name = "DsSendActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
dg1_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TestOa";
   query->symbol_name = "TestOa";
   query->guid = "23f51139-6973-4b45-a211-778834ce2c9a";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 13);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04100000 },
         { .reg = 0x00009888, .val = 0x36000001 },
         { .reg = 0x00009888, .val = 0x2C010004 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x26060040 },
         { .reg = 0x00009888, .val = 0x1C090200 },
         { .reg = 0x00009888, .val = 0x0E0C8000 },
         { .reg = 0x00009888, .val = 0x200D8000 },
         { .reg = 0x00009888, .val = 0x1E0F8000 },
         { .reg = 0x00009888, .val = 0x1E100017 },
         { .reg = 0x00009888, .val = 0x00100000 },
         { .reg = 0x00009888, .val = 0x1E0B4000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x49110000 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x1D140020 },
         { .reg = 0x00009888, .val = 0x1D1103A3 },
         { .reg = 0x00009888, .val = 0x01110000 },
         { .reg = 0x00009888, .val = 0x61112000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100230 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000004 },
         { .reg = 0x0000D944, .val = 0x0000FFFF },
         { .reg = 0x0000DC00, .val = 0x00000004 },
         { .reg = 0x0000DC04, .val = 0x0000FFFF },
         { .reg = 0x0000D948, .val = 0x00000003 },
         { .reg = 0x0000D94C, .val = 0x0000FFFF },
         { .reg = 0x0000DC08, .val = 0x00000003 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFF },
         { .reg = 0x0000D950, .val = 0x00000007 },
         { .reg = 0x0000D954, .val = 0x0000FFFF },
         { .reg = 0x0000DC10, .val = 0x00000007 },
         { .reg = 0x0000DC14, .val = 0x0000FFFF },
         { .reg = 0x0000D958, .val = 0x00100002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00100002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00100002 },
         { .reg = 0x0000D964, .val = 0x0000FFCF },
         { .reg = 0x0000DC20, .val = 0x00100002 },
         { .reg = 0x0000DC24, .val = 0x0000FFCF },
         { .reg = 0x0000D968, .val = 0x00100082 },
         { .reg = 0x0000D96C, .val = 0x0000FFEF },
         { .reg = 0x0000DC28, .val = 0x00100082 },
         { .reg = 0x0000DC2C, .val = 0x0000FFEF },
         { .reg = 0x0000D970, .val = 0x001000C2 },
         { .reg = 0x0000D974, .val = 0x0000FFE7 },
         { .reg = 0x0000DC30, .val = 0x001000C2 },
         { .reg = 0x0000DC34, .val = 0x0000FFE7 },
         { .reg = 0x0000D978, .val = 0x00100001 },
         { .reg = 0x0000D97C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC38, .val = 0x00100001 },
         { .reg = 0x0000DC3C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = dg1__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.16666 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.6666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1 in IOStream or in OAG query mode Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = dg1__test_oa__counter9__read;
      counter->name = "TestCounter9 - OAR enable";
      counter->desc = "HW test counter 9. Should be equal to 1 in query. Unit: events.";
      counter->symbol_name = "Counter9";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_dg1(struct intel_perf_config *perf)
{
   dg1_register_render_basic_counter_query(perf);
   dg1_register_compute_basic_counter_query(perf);
   dg1_register_render_pipe_profile_counter_query(perf);
   dg1_register_hdc_and_sf_counter_query(perf);
   dg1_register_rasterizer_and_pixel_backend_counter_query(perf);
   dg1_register_l3_1_counter_query(perf);
   dg1_register_l3_2_counter_query(perf);
   dg1_register_l3_3_counter_query(perf);
   dg1_register_l3_4_counter_query(perf);
   dg1_register_l3_5_counter_query(perf);
   dg1_register_l3_6_counter_query(perf);
   dg1_register_sampler_1_counter_query(perf);
   dg1_register_sampler_2_counter_query(perf);
   dg1_register_tdl_1_counter_query(perf);
   dg1_register_tdl_2_counter_query(perf);
   dg1_register_tdl_3_counter_query(perf);
   dg1_register_gpu_busyness_counter_query(perf);
   dg1_register_eu_activity1_counter_query(perf);
   dg1_register_eu_activity2_counter_query(perf);
   dg1_register_eu_activity3_counter_query(perf);
   dg1_register_eu_activity4_counter_query(perf);
   dg1_register_eu_activity5_counter_query(perf);
   dg1_register_eu_activity6_counter_query(perf);
   dg1_register_eu_activity7_counter_query(perf);
   dg1_register_eu_activity8_counter_query(perf);
   dg1_register_test_oa_counter_query(perf);
}


static void
adl_register_render_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics Basic set";
   query->symbol_name = "RenderBasic";
   query->guid = "4b886bf3-61ff-4381-9994-ac9b91202fc7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 34);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14150001 },
         { .reg = 0x00009888, .val = 0x16150020 },
         { .reg = 0x00009888, .val = 0x00124000 },
         { .reg = 0x00009888, .val = 0x0E124000 },
         { .reg = 0x00009888, .val = 0x10124000 },
         { .reg = 0x00009888, .val = 0x12124000 },
         { .reg = 0x00009888, .val = 0x10138000 },
         { .reg = 0x00009888, .val = 0x1C130E00 },
         { .reg = 0x00009888, .val = 0x00150050 },
         { .reg = 0x00009888, .val = 0x06157000 },
         { .reg = 0x00009888, .val = 0x08157151 },
         { .reg = 0x00009888, .val = 0x10150000 },
         { .reg = 0x00009888, .val = 0x18150000 },
         { .reg = 0x00009888, .val = 0x1C150000 },
         { .reg = 0x00009888, .val = 0x18004000 },
         { .reg = 0x00009888, .val = 0x36000490 },
         { .reg = 0x00009888, .val = 0x1C058000 },
         { .reg = 0x00009888, .val = 0x2405002A },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D47 },
         { .reg = 0x00009888, .val = 0x09151536 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B1050BB },
         { .reg = 0x00009888, .val = 0x5D102C01 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D1402A0 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x1F150137 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x0F168000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x1D350137 },
         { .reg = 0x00009888, .val = 0x03350147 },
         { .reg = 0x00009888, .val = 0x07350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x0F364000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55101210 },
         { .reg = 0x00009888, .val = 0x57100002 },
         { .reg = 0x00009888, .val = 0x49101212 },
         { .reg = 0x00009888, .val = 0x4B100212 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__render_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 76;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__render_basic__sampler00_busy__read;
         counter->name = "Sampler00 Busy";
         counter->desc = "The percentage of time in which Slice0 Sampler0 has been processing EU requests. Unit: percent.";
         counter->symbol_name = "Sampler00Busy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__render_basic__sampler00_bottleneck__read;
         counter->name = "Sampler Slice0 Dualsubslice0 is bottleneck";
         counter->desc = "The percentage of time when sampler slice0 dualsubslice0 is bottleneck Unit: percent.";
         counter->symbol_name = "Sampler00Bottleneck";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__render_basic__samplers_busy__read;
         counter->name = "Samplers Busy";
         counter->desc = "The percentage of time in which samplers have been processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplersBusy";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__render_basic__sampler_bottleneck__read;
         counter->name = "Samplers Bottleneck";
         counter->desc = "The percentage of time in which samplers have been slowing down the pipe when processing EU requests. Unit: percent.";
         counter->symbol_name = "SamplerBottleneck";
         counter->category = "Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 232;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_compute_basic_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Compute Metrics Basic";
   query->symbol_name = "ComputeBasic";
   query->guid = "eb6a0ef8-4e8f-41fa-85ba-686835711f6b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 30);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__compute_basic__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__compute_basic__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__compute_basic__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__compute_basic__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__compute_basic__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__compute_basic__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 224;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_render_pipe_profile_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Render Metrics set for 3D Pipeline Profile";
   query->symbol_name = "RenderPipeProfile";
   query->guid = "ee81cfab-667d-46c5-a2b7-17283b28c38d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 43);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x00123E00 },
         { .reg = 0x00009888, .val = 0x060B00B3 },
         { .reg = 0x00009888, .val = 0x140B3C00 },
         { .reg = 0x00009888, .val = 0x1C0B0000 },
         { .reg = 0x00009888, .val = 0x120C8320 },
         { .reg = 0x00009888, .val = 0x040DBE00 },
         { .reg = 0x00009888, .val = 0x000D0000 },
         { .reg = 0x00009888, .val = 0x280D0000 },
         { .reg = 0x00009888, .val = 0x2C0E7C00 },
         { .reg = 0x00009888, .val = 0x10087C00 },
         { .reg = 0x00009888, .val = 0x1E120002 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x1E130002 },
         { .reg = 0x00009888, .val = 0x0E0B0031 },
         { .reg = 0x00009888, .val = 0x180B0092 },
         { .reg = 0x00009888, .val = 0x1A0B00B1 },
         { .reg = 0x00009888, .val = 0x020B0093 },
         { .reg = 0x00009888, .val = 0x040B0033 },
         { .reg = 0x00009888, .val = 0x000B0000 },
         { .reg = 0x00009888, .val = 0x0A0C0022 },
         { .reg = 0x00009888, .val = 0x1E0C0030 },
         { .reg = 0x00009888, .val = 0x1C0C8000 },
         { .reg = 0x00009888, .val = 0x140C8000 },
         { .reg = 0x00009888, .val = 0x160C8000 },
         { .reg = 0x00009888, .val = 0x100DC017 },
         { .reg = 0x00009888, .val = 0x160D0013 },
         { .reg = 0x00009888, .val = 0x1C0D0081 },
         { .reg = 0x00009888, .val = 0x080D0082 },
         { .reg = 0x00009888, .val = 0x0A0D8102 },
         { .reg = 0x00009888, .val = 0x140D0000 },
         { .reg = 0x00009888, .val = 0x0C0D0000 },
         { .reg = 0x00009888, .val = 0x0E0D4000 },
         { .reg = 0x00009888, .val = 0x120D0000 },
         { .reg = 0x00009888, .val = 0x060DC000 },
         { .reg = 0x00009888, .val = 0x0C0EC1C5 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x0E0E8000 },
         { .reg = 0x00009888, .val = 0x100E4000 },
         { .reg = 0x00009888, .val = 0x120EC000 },
         { .reg = 0x00009888, .val = 0x140EC000 },
         { .reg = 0x00009888, .val = 0x160E4000 },
         { .reg = 0x00009888, .val = 0x080E8000 },
         { .reg = 0x00009888, .val = 0x0A0EC000 },
         { .reg = 0x00009888, .val = 0x1C0F5555 },
         { .reg = 0x00009888, .val = 0x1E0F0554 },
         { .reg = 0x00009888, .val = 0x0E104000 },
         { .reg = 0x00009888, .val = 0x10104000 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x18104000 },
         { .reg = 0x00009888, .val = 0x1A104000 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x02104000 },
         { .reg = 0x00009888, .val = 0x04104000 },
         { .reg = 0x00009888, .val = 0x06104000 },
         { .reg = 0x00009888, .val = 0x08104000 },
         { .reg = 0x00009888, .val = 0x0A104000 },
         { .reg = 0x00009888, .val = 0x0C104000 },
         { .reg = 0x00009888, .val = 0x0E024000 },
         { .reg = 0x00009888, .val = 0x10024000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020055 },
         { .reg = 0x00009888, .val = 0x02024000 },
         { .reg = 0x00009888, .val = 0x04024000 },
         { .reg = 0x00009888, .val = 0x06024000 },
         { .reg = 0x00009888, .val = 0x08024000 },
         { .reg = 0x00009888, .val = 0x0A024000 },
         { .reg = 0x00009888, .val = 0x0C024000 },
         { .reg = 0x00009888, .val = 0x1A032000 },
         { .reg = 0x00009888, .val = 0x1C032000 },
         { .reg = 0x00009888, .val = 0x2A035500 },
         { .reg = 0x00009888, .val = 0x2C030001 },
         { .reg = 0x00009888, .val = 0x02034000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x06034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x360036DB },
         { .reg = 0x00009888, .val = 0x380026DB },
         { .reg = 0x00009888, .val = 0x1A006000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x26050002 },
         { .reg = 0x00009888, .val = 0x0E0A8000 },
         { .reg = 0x00009888, .val = 0x100A8000 },
         { .reg = 0x00009888, .val = 0x120A4000 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x020A8000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009888, .val = 0x08081000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x1D14AAAA },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B14AA00 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x4D100604 },
         { .reg = 0x00009888, .val = 0x4F101400 },
         { .reg = 0x00009888, .val = 0x51100203 },
         { .reg = 0x00009888, .val = 0x53100004 },
         { .reg = 0x00009888, .val = 0x55101400 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100400 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__render_pipe_profile__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__render_pipe_profile__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__vf_bottleneck__read;
      counter->name = "VF Bottleneck";
      counter->desc = "The percentage of time in which vertex fetch pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VfBottleneck";
      counter->category = "3D Pipe/Input Assembler";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 216;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__vs_bottleneck__read;
      counter->name = "VS Bottleneck";
      counter->desc = "The percentage of time in which vertex shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "VsBottleneck";
      counter->category = "3D Pipe/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 220;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__hs_bottleneck__read;
      counter->name = "HS Bottleneck";
      counter->desc = "The percentage of time in which hull shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HsBottleneck";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__ds_bottleneck__read;
      counter->name = "DS Bottleneck";
      counter->desc = "The percentage of time in which domain shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "DsBottleneck";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__gs_bottleneck__read;
      counter->name = "GS Bottleneck";
      counter->desc = "The percentage of time in which geometry shader pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "GsBottleneck";
      counter->category = "3D Pipe/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__so_bottleneck__read;
      counter->name = "SO Bottleneck";
      counter->desc = "The percentage of time in which stream output pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SoBottleneck";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__cl_bottleneck__read;
      counter->name = "Clipper Bottleneck";
      counter->desc = "The percentage of time in which clipper pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "ClBottleneck";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__sf_bottleneck__read;
      counter->name = "Strip-Fans Bottleneck";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "SfBottleneck";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__hi_depth_bottleneck__read;
      counter->name = "Hi-Depth Bottleneck";
      counter->desc = "The percentage of time in which early hierarchical depth test pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "HiDepthBottleneck";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__bc_bottleneck__read;
      counter->name = "BC Bottleneck";
      counter->desc = "The percentage of time in which barycentric coordinates calculation pipeline stage was slowing down the 3D pipeline. Unit: percent.";
      counter->symbol_name = "BcBottleneck";
      counter->category = "3D Pipe/Rasterizer/Barycentric Calc";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__hs_stall__read;
      counter->name = "HS Stall";
      counter->desc = "The percentage of time in which hull stall pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "HsStall";
      counter->category = "3D Pipe/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__ds_stall__read;
      counter->name = "DS Stall";
      counter->desc = "The percentage of time in which domain shader pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "DsStall";
      counter->category = "3D Pipe/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__so_stall__read;
      counter->name = "SO Stall";
      counter->desc = "The percentage of time in which stream-output pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SoStall";
      counter->category = "3D Pipe/Stream Output";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__cl_stall__read;
      counter->name = "CL Stall";
      counter->desc = "The percentage of time in which clipper pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "ClStall";
      counter->category = "3D Pipe/Clipper";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 268;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__render_pipe_profile__sf_stall__read;
      counter->name = "SF Stall";
      counter->desc = "The percentage of time in which strip-fans pipeline stage was stalled. Unit: percent.";
      counter->symbol_name = "SfStall";
      counter->category = "3D Pipe/Rasterizer/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 272;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_hdc_and_sf_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set HDCAndSF";
   query->symbol_name = "HDCAndSF";
   query->guid = "73acd312-486b-4aa8-bcd1-232bfb29481b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 35);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x14112400 },
         { .reg = 0x00009888, .val = 0x14312400 },
         { .reg = 0x00009888, .val = 0x14512474 },
         { .reg = 0x00009888, .val = 0x14712400 },
         { .reg = 0x00009888, .val = 0x14912400 },
         { .reg = 0x00009888, .val = 0x14B12400 },
         { .reg = 0x00009888, .val = 0x240A0019 },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x24070002 },
         { .reg = 0x00009888, .val = 0x10110074 },
         { .reg = 0x00009888, .val = 0x08110000 },
         { .reg = 0x00009888, .val = 0x10128000 },
         { .reg = 0x00009888, .val = 0x1C130400 },
         { .reg = 0x00009888, .val = 0x12310074 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x08310000 },
         { .reg = 0x00009888, .val = 0x12328000 },
         { .reg = 0x00009888, .val = 0x1C330800 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x0A510000 },
         { .reg = 0x00009888, .val = 0x14528000 },
         { .reg = 0x00009888, .val = 0x1C531000 },
         { .reg = 0x00009888, .val = 0x16710074 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0A710000 },
         { .reg = 0x00009888, .val = 0x16728000 },
         { .reg = 0x00009888, .val = 0x1C732000 },
         { .reg = 0x00009888, .val = 0x0E910074 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x06910000 },
         { .reg = 0x00009888, .val = 0x0E928000 },
         { .reg = 0x00009888, .val = 0x1C930200 },
         { .reg = 0x00009888, .val = 0x00B10074 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1E0F0020 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A030600 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003488 },
         { .reg = 0x00009888, .val = 0x3800001B },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x24050038 },
         { .reg = 0x00009888, .val = 0x24060080 },
         { .reg = 0x00009888, .val = 0x180A00F7 },
         { .reg = 0x00009888, .val = 0x200A0000 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105000 },
         { .reg = 0x00009888, .val = 0x5D100055 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D14AAA0 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100806 },
         { .reg = 0x00009888, .val = 0x51100408 },
         { .reg = 0x00009888, .val = 0x53100310 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x70800000 },
         { .reg = 0x0000DC40, .val = 0x007F0000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000002 },
         { .reg = 0x0000D94C, .val = 0x0000FFFD },
         { .reg = 0x0000DC08, .val = 0x00000002 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFD },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFFB },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFFB },
         { .reg = 0x0000D958, .val = 0x00000002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00000002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00000002 },
         { .reg = 0x0000D964, .val = 0x0000FFEF },
         { .reg = 0x0000DC20, .val = 0x00000002 },
         { .reg = 0x0000DC24, .val = 0x0000FFEF },
         { .reg = 0x0000D968, .val = 0x00000002 },
         { .reg = 0x0000D96C, .val = 0x0000FFDF },
         { .reg = 0x0000DC28, .val = 0x00000002 },
         { .reg = 0x0000DC2C, .val = 0x0000FFDF },
         { .reg = 0x0000D970, .val = 0x00000002 },
         { .reg = 0x0000D974, .val = 0x0000FFBF },
         { .reg = 0x0000DC30, .val = 0x00000002 },
         { .reg = 0x0000DC34, .val = 0x0000FFBF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__hdc_and_sf__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__hdc_and_sf__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__hdc_and_sf__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__hdc_and_sf__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__hdc_and_sf__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__hdc_and_sf__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__hdc_and_sf__non_sampler_shader00_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice0 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice0) Unit: percent.";
         counter->symbol_name = "NonSamplerShader00AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__hdc_and_sf__non_sampler_shader01_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice1 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice1) Unit: percent.";
         counter->symbol_name = "NonSamplerShader01AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__hdc_and_sf__non_sampler_shader02_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice2 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice2) Unit: percent.";
         counter->symbol_name = "NonSamplerShader02AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 224;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__hdc_and_sf__non_sampler_shader03_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice3 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice3) Unit: percent.";
         counter->symbol_name = "NonSamplerShader03AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 228;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__hdc_and_sf__non_sampler_shader04_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice4 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice4) Unit: percent.";
         counter->symbol_name = "NonSamplerShader04AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 232;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__hdc_and_sf__non_sampler_shader05_access_stalled_on_l3__read;
         counter->name = "Slice0 Dualsubslice5 Non-sampler Shader Access Stalled On L3";
         counter->desc = "Percentage of time when HDC has messages to L3, but it's stalled due to lack of credits (Slice0 Dualsubslice5) Unit: percent.";
         counter->symbol_name = "NonSamplerShader05AccessStalledOnL3";
         counter->category = "GPU/Data Port";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 236;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__hdc_and_sf__poly_data_ready__read;
      counter->name = "Polygon Data Ready";
      counter->desc = "The percentage of time in which geometry pipeline output is ready Unit: percent.";
      counter->symbol_name = "PolyDataReady";
      counter->category = "GPU/3D Pipe/Strip-Fans";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_rasterizer_and_pixel_backend_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set RasterizerAndPixelBackend";
   query->symbol_name = "RasterizerAndPixelBackend";
   query->guid = "ef158e77-1bca-402c-b2d6-1654908fd977";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 41);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1E075000 },
         { .reg = 0x00009888, .val = 0x1A0700C0 },
         { .reg = 0x00009888, .val = 0x1E055000 },
         { .reg = 0x00009888, .val = 0x1A0500C0 },
         { .reg = 0x00009888, .val = 0x1E065000 },
         { .reg = 0x00009888, .val = 0x1A0600C0 },
         { .reg = 0x00009888, .val = 0x2A0A7300 },
         { .reg = 0x00009888, .val = 0x2C0A0000 },
         { .reg = 0x00009888, .val = 0x120800A0 },
         { .reg = 0x00009888, .val = 0x0A07C000 },
         { .reg = 0x00009888, .val = 0x0E070027 },
         { .reg = 0x00009888, .val = 0x10070000 },
         { .reg = 0x00009888, .val = 0x24070000 },
         { .reg = 0x00009888, .val = 0x2A032000 },
         { .reg = 0x00009888, .val = 0x2C030008 },
         { .reg = 0x00009888, .val = 0x18006000 },
         { .reg = 0x00009888, .val = 0x360036D8 },
         { .reg = 0x00009888, .val = 0x38003299 },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x2A010400 },
         { .reg = 0x00009888, .val = 0x2C010001 },
         { .reg = 0x00009888, .val = 0x0C05C000 },
         { .reg = 0x00009888, .val = 0x00052700 },
         { .reg = 0x00009888, .val = 0x10050000 },
         { .reg = 0x00009888, .val = 0x24050000 },
         { .reg = 0x00009888, .val = 0x22050000 },
         { .reg = 0x00009888, .val = 0x0C0600C0 },
         { .reg = 0x00009888, .val = 0x0E062700 },
         { .reg = 0x00009888, .val = 0x10060000 },
         { .reg = 0x00009888, .val = 0x24060000 },
         { .reg = 0x00009888, .val = 0x26060000 },
         { .reg = 0x00009888, .val = 0x000A0144 },
         { .reg = 0x00009888, .val = 0x0E0A0145 },
         { .reg = 0x00009888, .val = 0x100A0156 },
         { .reg = 0x00009888, .val = 0x040A014F },
         { .reg = 0x00009888, .val = 0x200A0000 },
         { .reg = 0x00009888, .val = 0x120A4000 },
         { .reg = 0x00009888, .val = 0x140A4000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009888, .val = 0x08081980 },
         { .reg = 0x00009888, .val = 0x0A080032 },
         { .reg = 0x00009888, .val = 0x10080000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x31152800 },
         { .reg = 0x00009888, .val = 0x331500A0 },
         { .reg = 0x00009888, .val = 0x31352800 },
         { .reg = 0x00009888, .val = 0x333500A0 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105FA5 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D14AAA0 },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B140A00 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x07150016 },
         { .reg = 0x00009888, .val = 0x09150096 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03168000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x0B350016 },
         { .reg = 0x00009888, .val = 0x0D350096 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x07364000 },
         { .reg = 0x00009888, .val = 0x47100400 },
         { .reg = 0x00009888, .val = 0x4D100010 },
         { .reg = 0x00009888, .val = 0x4F100404 },
         { .reg = 0x00009888, .val = 0x51100202 },
         { .reg = 0x00009888, .val = 0x53100002 },
         { .reg = 0x00009888, .val = 0x55100204 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x49101404 },
         { .reg = 0x00009888, .val = 0x4B101010 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00000038 },
         { .reg = 0x0000D944, .val = 0x0000FFF8 },
         { .reg = 0x0000DC00, .val = 0x00000038 },
         { .reg = 0x0000DC04, .val = 0x0000FFF8 },
         { .reg = 0x0000D948, .val = 0x000000C0 },
         { .reg = 0x0000D94C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC08, .val = 0x000000C0 },
         { .reg = 0x0000DC0C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__rasterizer_and_pixel_backend__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__rasterized_pixels__read;
      counter->name = "Rasterized Pixels";
      counter->desc = "The total number of rasterized pixels. Unit: pixels.";
      counter->symbol_name = "RasterizedPixels";
      counter->category = "3D Pipe/Rasterizer";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__hi_depth_test_fails__read;
      counter->name = "Early Hi-Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early hierarchical depth test. Unit: pixels.";
      counter->symbol_name = "HiDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Hi-Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__early_depth_test_fails__read;
      counter->name = "Early Depth Test Fails";
      counter->desc = "The total number of pixels dropped on early depth test. Unit: pixels.";
      counter->symbol_name = "EarlyDepthTestFails";
      counter->category = "3D Pipe/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__samples_killed_in_ps__read;
      counter->name = "Samples Killed in FS";
      counter->desc = "The total number of samples or pixels dropped in fragment shaders. Unit: pixels.";
      counter->symbol_name = "SamplesKilledInPs";
      counter->category = "3D Pipe/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 120;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__pixels_failing_post_ps_tests__read;
      counter->name = "Pixels Failing Tests";
      counter->desc = "The total number of pixels dropped on post-FS alpha, stencil, or depth tests. Unit: pixels.";
      counter->symbol_name = "PixelsFailingPostPsTests";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 128;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__samples_written__read;
      counter->name = "Samples Written";
      counter->desc = "The total number of samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesWritten";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 136;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__samples_blended__read;
      counter->name = "Samples Blended";
      counter->desc = "The total number of blended samples or pixels written to all render targets. Unit: pixels.";
      counter->symbol_name = "SamplesBlended";
      counter->category = "3D Pipe/Output Merger";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_PIXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 144;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__sampler_texels__read;
      counter->name = "Sampler Texels";
      counter->desc = "The total number of texels seen on input (with 2x2 accuracy) in all sampler units. Unit: texels.";
      counter->symbol_name = "SamplerTexels";
      counter->category = "Sampler/Sampler Input";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 152;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__sampler_texel_misses__read;
      counter->name = "Sampler Texels Misses";
      counter->desc = "The total number of texels lookups (with 2x2 accuracy) that missed L1 sampler cache. Unit: texels.";
      counter->symbol_name = "SamplerTexelMisses";
      counter->category = "Sampler/Sampler Cache";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_TEXELS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 160;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__slm_bytes_read__read;
      counter->name = "SLM Bytes Read";
      counter->desc = "The total number of GPU memory bytes read from shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesRead";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 168;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__slm_bytes_written__read;
      counter->name = "SLM Bytes Written";
      counter->desc = "The total number of GPU memory bytes written into shared local memory. Unit: bytes.";
      counter->symbol_name = "SlmBytesWritten";
      counter->category = "L3/Data Port/SLM";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 176;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__shader_memory_accesses__read;
      counter->name = "Shader Memory Accesses";
      counter->desc = "The total number of shader memory accesses to L3. Unit: messages.";
      counter->symbol_name = "ShaderMemoryAccesses";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 184;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__shader_atomics__read;
      counter->name = "Shader Atomic Memory Accesses";
      counter->desc = "The total number of shader atomic memory accesses. Unit: messages.";
      counter->symbol_name = "ShaderAtomics";
      counter->category = "L3/Data Port/Atomics";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 192;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__l3_shader_throughput__read;
      counter->name = "L3 Shader Throughput";
      counter->desc = "The total number of GPU memory bytes transferred between shaders and L3 caches w/o URB. Unit: bytes.";
      counter->symbol_name = "L3ShaderThroughput";
      counter->category = "L3/Data Port";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 200;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__rasterizer_and_pixel_backend__shader_barriers__read;
      counter->name = "Shader Barrier Messages";
      counter->desc = "The total number of shader barrier messages. Unit: messages.";
      counter->symbol_name = "ShaderBarriers";
      counter->category = "EU Array/Barrier";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_MESSAGES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 208;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__rasterizer0_input_available__read;
         counter->name = "Slice0 Rasterizer Input Available";
         counter->desc = "The percentage of time in which slice0 rasterizer input is available Unit: percent.";
         counter->symbol_name = "Rasterizer0InputAvailable";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 216;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__rasterizer0_output_ready__read;
         counter->name = "Slice0 Rasterizer Output Ready";
         counter->desc = "The percentage of time in which slice0 rasterizer output is ready Unit: percent.";
         counter->symbol_name = "Rasterizer0OutputReady";
         counter->category = "GPU/Rasterizer";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 220;
      }

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__pixel_data00_ready__read;
      counter->name = "Slice0 Pipe0 Post-EarlyZ Pixel Data Ready";
      counter->desc = "The percentage of time in which slice0  pipe0 post-EarlyZ pixel data is ready (after early Z tests have been applied) Unit: percent.";
      counter->symbol_name = "PixelData00Ready";
      counter->category = "GPU/Rasterizer/Early Depth Test";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 224;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__ps_output00_available__read;
      counter->name = "Slice0 Pipe0 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe0 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput00Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 228;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__ps_output01_available__read;
      counter->name = "Slice0 Pipe1 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe1 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput01Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 232;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__ps_output02_available__read;
      counter->name = "Slice0 Pipe2 PS Output Available";
      counter->desc = "The percentage of time in which slice0 pipe2 PS output is available Unit: percent.";
      counter->symbol_name = "PSOutput02Available";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 236;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__pixel_values00_ready__read;
      counter->name = "Slice0 Pipe0 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe0 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues00Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 240;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__pixel_values01_ready__read;
      counter->name = "Slice0 Pipe1 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe1 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues01Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 244;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__pixel_values02_ready__read;
      counter->name = "Slice0 Pipe2 Pixel Values Ready";
      counter->desc = "The percentage of time in which slice0 pipe2 pixel values are ready Unit: percent.";
      counter->symbol_name = "PixelValues02Ready";
      counter->category = "GPU/3D Pipe";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 248;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__gt_request_queue00_full__read;
      counter->name = "SQ00 is full";
      counter->desc = "The percentage of time when IDI0 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue00Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 252;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__gt_request_queue01_full__read;
      counter->name = "SQ01 is full";
      counter->desc = "The percentage of time when IDI0 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue01Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 256;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__gt_request_queue10_full__read;
      counter->name = "SQ10 is full";
      counter->desc = "The percentage of time when IDI1 SQ0 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue10Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 260;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__rasterizer_and_pixel_backend__gt_request_queue11_full__read;
      counter->name = "SQ11 is full";
      counter->desc = "The percentage of time when IDI1 SQ1 is filled above a threshold (usually 48 entries) Unit: percent.";
      counter->symbol_name = "GTRequestQueue11Full";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 264;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_l3_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_1";
   query->symbol_name = "L3_1";
   query->guid = "b997e963-d322-4d97-b631-f875b2135fc9";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04022000 },
         { .reg = 0x00009888, .val = 0x06022800 },
         { .reg = 0x00009888, .val = 0x04002827 },
         { .reg = 0x00009888, .val = 0x0600202C },
         { .reg = 0x00009888, .val = 0x00020024 },
         { .reg = 0x00009888, .val = 0x0E020025 },
         { .reg = 0x00009888, .val = 0x10020026 },
         { .reg = 0x00009888, .val = 0x12020027 },
         { .reg = 0x00009888, .val = 0x1402002C },
         { .reg = 0x00009888, .val = 0x1602002D },
         { .reg = 0x00009888, .val = 0x1802002E },
         { .reg = 0x00009888, .val = 0x1A02002F },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x2C020000 },
         { .reg = 0x00009888, .val = 0x00034000 },
         { .reg = 0x00009888, .val = 0x1A032000 },
         { .reg = 0x00009888, .val = 0x1C032000 },
         { .reg = 0x00009888, .val = 0x1E032000 },
         { .reg = 0x00009888, .val = 0x2A035500 },
         { .reg = 0x00009888, .val = 0x1C000024 },
         { .reg = 0x00009888, .val = 0x1E000025 },
         { .reg = 0x00009888, .val = 0x02000026 },
         { .reg = 0x00009888, .val = 0x0800002D },
         { .reg = 0x00009888, .val = 0x0A00002E },
         { .reg = 0x00009888, .val = 0x0C00002F },
         { .reg = 0x00009888, .val = 0x360036D8 },
         { .reg = 0x00009888, .val = 0x18006000 },
         { .reg = 0x00009888, .val = 0x380000DB },
         { .reg = 0x00009888, .val = 0x1A000000 },
         { .reg = 0x00009888, .val = 0x34000000 },
         { .reg = 0x00009888, .val = 0x000A8000 },
         { .reg = 0x00009888, .val = 0x0E0A8000 },
         { .reg = 0x00009888, .val = 0x100A8000 },
         { .reg = 0x00009888, .val = 0x120A8000 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D14AAAA },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B14AA00 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x4D100606 },
         { .reg = 0x00009888, .val = 0x4F100000 },
         { .reg = 0x00009888, .val = 0x51100000 },
         { .reg = 0x00009888, .val = 0x53100000 },
         { .reg = 0x00009888, .val = 0x55100600 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100006 },
         { .reg = 0x00009888, .val = 0x49100606 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__l3_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_1__l30_bank0_input_available__read;
         counter->name = "Slice0 L3 Bank0 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank0 has input available Unit: percent.";
         counter->symbol_name = "L30Bank0InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 88;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_1__l30_bank1_input_available__read;
         counter->name = "Slice0 L3 Bank1 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank1 has input available Unit: percent.";
         counter->symbol_name = "L30Bank1InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_1__l30_bank4_input_available__read;
         counter->name = "Slice0 L3 Bank4 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank4 has input available Unit: percent.";
         counter->symbol_name = "L30Bank4InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_1__l30_bank5_input_available__read;
         counter->name = "Slice0 L3 Bank5 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank5 has input available Unit: percent.";
         counter->symbol_name = "L30Bank5InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_l3_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_2";
   query->symbol_name = "L3_2";
   query->guid = "57c490ef-4993-465e-b1e0-774fbc104fdf";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 17);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04023027 },
         { .reg = 0x00009888, .val = 0x0602382C },
         { .reg = 0x00009888, .val = 0x04003000 },
         { .reg = 0x00009888, .val = 0x06003800 },
         { .reg = 0x00009888, .val = 0x1C020024 },
         { .reg = 0x00009888, .val = 0x1E020025 },
         { .reg = 0x00009888, .val = 0x02020026 },
         { .reg = 0x00009888, .val = 0x0802002D },
         { .reg = 0x00009888, .val = 0x0A02002E },
         { .reg = 0x00009888, .val = 0x0C02002F },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x02034000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x06034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x00000024 },
         { .reg = 0x00009888, .val = 0x0E000025 },
         { .reg = 0x00009888, .val = 0x10000026 },
         { .reg = 0x00009888, .val = 0x12000027 },
         { .reg = 0x00009888, .val = 0x1400002C },
         { .reg = 0x00009888, .val = 0x1600002D },
         { .reg = 0x00009888, .val = 0x1800002E },
         { .reg = 0x00009888, .val = 0x1A00602F },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009888, .val = 0x020A8000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D14AAAA },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B14AA00 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F100606 },
         { .reg = 0x00009888, .val = 0x51100606 },
         { .reg = 0x00009888, .val = 0x53100606 },
         { .reg = 0x00009888, .val = 0x55100006 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__l3_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_2__l30_bank2_input_available__read;
         counter->name = "Slice0 L3 Bank2 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank2 has input available Unit: percent.";
         counter->symbol_name = "L30Bank2InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_2__l30_bank3_input_available__read;
         counter->name = "Slice0 L3 Bank3 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank3 has input available Unit: percent.";
         counter->symbol_name = "L30Bank3InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_2__l30_bank6_input_available__read;
         counter->name = "Slice0 L3 Bank6 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank6 has input available Unit: percent.";
         counter->symbol_name = "L30Bank6InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_2__l30_bank7_input_available__read;
         counter->name = "Slice0 L3 Bank7 Input Available";
         counter->desc = "The percentage of time in which slice0 L3 bank7 has input available Unit: percent.";
         counter->symbol_name = "L30Bank7InputAvailable";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_l3_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_3";
   query->symbol_name = "L3_3";
   query->guid = "50d02e16-414e-4b4c-adbd-71c584f857b5";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04020200 },
         { .reg = 0x00009888, .val = 0x06020020 },
         { .reg = 0x00009888, .val = 0x04000200 },
         { .reg = 0x00009888, .val = 0x06000000 },
         { .reg = 0x00009888, .val = 0x08020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x06034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A000020 },
         { .reg = 0x00009888, .val = 0x0C000028 },
         { .reg = 0x00009888, .val = 0x36000000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34000300 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x1B14A000 },
         { .reg = 0x00009888, .val = 0x1D14000A },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100000 },
         { .reg = 0x00009888, .val = 0x4D100606 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__l3_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_3__l30_bank0_output_ready__read;
         counter->name = "Slice0 L3 Bank0 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank0 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank0OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_3__l30_bank4_output_ready__read;
         counter->name = "Slice0 L3 Bank4 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank4 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank4OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_l3_4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_4";
   query->symbol_name = "L3_4";
   query->guid = "896dda0d-84b8-4d1d-b54b-c2e5ded94581";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04020A00 },
         { .reg = 0x00009888, .val = 0x06020800 },
         { .reg = 0x00009888, .val = 0x04000A00 },
         { .reg = 0x00009888, .val = 0x06000820 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x1E000000 },
         { .reg = 0x00009888, .val = 0x34001800 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x1B14A000 },
         { .reg = 0x00009888, .val = 0x1D14000A },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__l3_4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_4__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_4__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_4__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_4__l30_bank1_output_ready__read;
         counter->name = "Slice0 L3 Bank1 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank1 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank1OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_4__l30_bank5_output_ready__read;
         counter->name = "Slice0 L3 Bank5 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank5 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank5OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_l3_5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_5";
   query->symbol_name = "L3_5";
   query->guid = "f5e936f0-aacb-4aea-80a5-d434f21d427d";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04021200 },
         { .reg = 0x00009888, .val = 0x06021000 },
         { .reg = 0x00009888, .val = 0x04001200 },
         { .reg = 0x00009888, .val = 0x06001020 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x1E000000 },
         { .reg = 0x00009888, .val = 0x34001800 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x1B14A000 },
         { .reg = 0x00009888, .val = 0x1D14000A },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__l3_5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_5__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_5__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_5__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_5__l30_bank2_output_ready__read;
         counter->name = "Slice0 L3 Bank2 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank2 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank2OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_5__l30_bank6_output_ready__read;
         counter->name = "Slice0 L3 Bank6 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank6 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank6OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_l3_6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "L3_6";
   query->symbol_name = "L3_6";
   query->guid = "c9b51681-9e54-49b0-918d-4e588078d5cd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 15);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x04021A00 },
         { .reg = 0x00009888, .val = 0x06021800 },
         { .reg = 0x00009888, .val = 0x04001A00 },
         { .reg = 0x00009888, .val = 0x06001820 },
         { .reg = 0x00009888, .val = 0x0A020020 },
         { .reg = 0x00009888, .val = 0x0C020028 },
         { .reg = 0x00009888, .val = 0x2E020000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x18032000 },
         { .reg = 0x00009888, .val = 0x08000028 },
         { .reg = 0x00009888, .val = 0x36000003 },
         { .reg = 0x00009888, .val = 0x1E000000 },
         { .reg = 0x00009888, .val = 0x34001800 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5B100550 },
         { .reg = 0x00009888, .val = 0x1B14A000 },
         { .reg = 0x00009888, .val = 0x1D14000A },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x00800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00000000 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__l3_6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__l3_6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_6__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_6__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__l3_6__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_6__l30_bank3_output_ready__read;
         counter->name = "Slice0 L3 Bank3 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank3 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank3OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.slice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__l3_6__l30_bank7_output_ready__read;
         counter->name = "Slice0 L3 Bank7 Output Ready";
         counter->desc = "The percentage of time in which slice0 L3 bank7 output is ready Unit: percent.";
         counter->symbol_name = "L30Bank7OutputReady";
         counter->category = "GTI/L3";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_sampler_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_1";
   query->symbol_name = "Sampler_1";
   query->guid = "c4a47a95-b80a-46d3-abf0-98a95a840407";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x1C121600 },
         { .reg = 0x00009888, .val = 0x18141600 },
         { .reg = 0x00009888, .val = 0x1C325600 },
         { .reg = 0x00009888, .val = 0x18341600 },
         { .reg = 0x00009888, .val = 0x1C521600 },
         { .reg = 0x00009888, .val = 0x185416A6 },
         { .reg = 0x00009888, .val = 0x1C721600 },
         { .reg = 0x00009888, .val = 0x18741600 },
         { .reg = 0x00009888, .val = 0x1C921600 },
         { .reg = 0x00009888, .val = 0x18941600 },
         { .reg = 0x00009888, .val = 0x1CB21600 },
         { .reg = 0x00009888, .val = 0x18B41600 },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x2407002B },
         { .reg = 0x00009888, .val = 0x04120086 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x02124000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x021400A6 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x00140000 },
         { .reg = 0x00009888, .val = 0x1A150020 },
         { .reg = 0x00009888, .val = 0x1E320086 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x1C3400A6 },
         { .reg = 0x00009888, .val = 0x10340000 },
         { .reg = 0x00009888, .val = 0x0E340000 },
         { .reg = 0x00009888, .val = 0x1C358000 },
         { .reg = 0x00009888, .val = 0x1A520086 },
         { .reg = 0x00009888, .val = 0x20520000 },
         { .reg = 0x00009888, .val = 0x18524000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x10540000 },
         { .reg = 0x00009888, .val = 0x0C540000 },
         { .reg = 0x00009888, .val = 0x1C550800 },
         { .reg = 0x00009888, .val = 0x16720086 },
         { .reg = 0x00009888, .val = 0x20720000 },
         { .reg = 0x00009888, .val = 0x14724000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x147400A6 },
         { .reg = 0x00009888, .val = 0x10740000 },
         { .reg = 0x00009888, .val = 0x0A740000 },
         { .reg = 0x00009888, .val = 0x1C750080 },
         { .reg = 0x00009888, .val = 0x12920086 },
         { .reg = 0x00009888, .val = 0x20920000 },
         { .reg = 0x00009888, .val = 0x10924000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x109400A6 },
         { .reg = 0x00009888, .val = 0x08940000 },
         { .reg = 0x00009888, .val = 0x1C950008 },
         { .reg = 0x00009888, .val = 0x0EB20086 },
         { .reg = 0x00009888, .val = 0x20B20000 },
         { .reg = 0x00009888, .val = 0x00B24000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30200 },
         { .reg = 0x00009888, .val = 0x00B400A6 },
         { .reg = 0x00009888, .val = 0x10B40000 },
         { .reg = 0x00009888, .val = 0x18B58000 },
         { .reg = 0x00009888, .val = 0x1E0F0028 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A03A500 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003248 },
         { .reg = 0x00009888, .val = 0x380024DB },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x220500A0 },
         { .reg = 0x00009888, .val = 0x24062800 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105005 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D14AAA0 },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B140A00 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F10060A },
         { .reg = 0x00009888, .val = 0x5110000A },
         { .reg = 0x00009888, .val = 0x53100404 },
         { .reg = 0x00009888, .val = 0x55101808 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x49100118 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00001800 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00001800 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00006000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00006000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__sampler_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_1__sampler00_input_available__read;
         counter->name = "Slice0 DualSubslice0 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler00InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_1__sampler01_input_available__read;
         counter->name = "Slice0 DualSubslice1 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler01InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_1__sampler02_input_available__read;
         counter->name = "Slice0 DualSubslice2 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice2 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler02InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_1__sampler03_input_available__read;
         counter->name = "Slice0 DualSubslice3 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice3 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler03InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_1__sampler04_input_available__read;
         counter->name = "Slice0 DualSubslice4 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice4 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler04InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_1__sampler05_input_available__read;
         counter->name = "Slice0 DualSubslice5 Input Available";
         counter->desc = "The percentage of time in which slice0 dualsubslice5 sampler input is available Unit: percent.";
         counter->symbol_name = "Sampler05InputAvailable";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_sampler_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Sampler_2";
   query->symbol_name = "Sampler_2";
   query->guid = "2d7c9960-0ec0-4aa2-9bd0-e6d76f214bf7";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0C123E00 },
         { .reg = 0x00009888, .val = 0x04143E00 },
         { .reg = 0x00009888, .val = 0x0C323E00 },
         { .reg = 0x00009888, .val = 0x04343E00 },
         { .reg = 0x00009888, .val = 0x0C523E00 },
         { .reg = 0x00009888, .val = 0x04543E00 },
         { .reg = 0x00009888, .val = 0x0C723E00 },
         { .reg = 0x00009888, .val = 0x04743E00 },
         { .reg = 0x00009888, .val = 0x0C923E00 },
         { .reg = 0x00009888, .val = 0x04943E00 },
         { .reg = 0x00009888, .val = 0x0CB23E00 },
         { .reg = 0x00009888, .val = 0x04B43E00 },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x2407002B },
         { .reg = 0x00009888, .val = 0x04120033 },
         { .reg = 0x00009888, .val = 0x20120000 },
         { .reg = 0x00009888, .val = 0x02124000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x02140013 },
         { .reg = 0x00009888, .val = 0x10140000 },
         { .reg = 0x00009888, .val = 0x00140000 },
         { .reg = 0x00009888, .val = 0x1A150020 },
         { .reg = 0x00009888, .val = 0x1E320033 },
         { .reg = 0x00009888, .val = 0x20320000 },
         { .reg = 0x00009888, .val = 0x1C324000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x1C340013 },
         { .reg = 0x00009888, .val = 0x10340000 },
         { .reg = 0x00009888, .val = 0x0E340000 },
         { .reg = 0x00009888, .val = 0x1C358000 },
         { .reg = 0x00009888, .val = 0x1A520033 },
         { .reg = 0x00009888, .val = 0x20520000 },
         { .reg = 0x00009888, .val = 0x18524000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x18540013 },
         { .reg = 0x00009888, .val = 0x10540000 },
         { .reg = 0x00009888, .val = 0x0C540000 },
         { .reg = 0x00009888, .val = 0x1C550800 },
         { .reg = 0x00009888, .val = 0x16720033 },
         { .reg = 0x00009888, .val = 0x20720000 },
         { .reg = 0x00009888, .val = 0x14724000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x14740013 },
         { .reg = 0x00009888, .val = 0x10740000 },
         { .reg = 0x00009888, .val = 0x0A740000 },
         { .reg = 0x00009888, .val = 0x1C750080 },
         { .reg = 0x00009888, .val = 0x12920033 },
         { .reg = 0x00009888, .val = 0x20920000 },
         { .reg = 0x00009888, .val = 0x10924000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x10940013 },
         { .reg = 0x00009888, .val = 0x08940000 },
         { .reg = 0x00009888, .val = 0x1C950008 },
         { .reg = 0x00009888, .val = 0x0EB20033 },
         { .reg = 0x00009888, .val = 0x20B20000 },
         { .reg = 0x00009888, .val = 0x00B24000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30200 },
         { .reg = 0x00009888, .val = 0x00B40013 },
         { .reg = 0x00009888, .val = 0x10B40000 },
         { .reg = 0x00009888, .val = 0x18B58000 },
         { .reg = 0x00009888, .val = 0x1E0F0028 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A03A500 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003248 },
         { .reg = 0x00009888, .val = 0x380024DB },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x220500A0 },
         { .reg = 0x00009888, .val = 0x24062800 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105005 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D14AAA0 },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B140A00 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100000 },
         { .reg = 0x00009888, .val = 0x4F10060A },
         { .reg = 0x00009888, .val = 0x5110000A },
         { .reg = 0x00009888, .val = 0x53100404 },
         { .reg = 0x00009888, .val = 0x55101808 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100001 },
         { .reg = 0x00009888, .val = 0x49100118 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x30800000 },
         { .reg = 0x0000DC40, .val = 0x003F0000 },
         { .reg = 0x0000D940, .val = 0x00000018 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000018 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000060 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000060 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000180 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000180 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000600 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000600 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00001800 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00001800 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00006000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00006000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__sampler_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__sampler_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__sampler_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_2__sampler00_output_ready__read;
         counter->name = "Slice0 DualSubslice0 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice0 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler00OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_2__sampler01_output_ready__read;
         counter->name = "Slice0 DualSubslice1 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice1 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler01OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_2__sampler02_output_ready__read;
         counter->name = "Slice0 DualSubslice2 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice2 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler02OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_2__sampler03_output_ready__read;
         counter->name = "Slice0 DualSubslice3 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice3 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler03OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_2__sampler04_output_ready__read;
         counter->name = "Slice0 DualSubslice4 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice4 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler04OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__sampler_2__sampler05_output_ready__read;
         counter->name = "Slice0 DualSubslice5 Sampler Output Ready";
         counter->desc = "The percentage of time in which slice0 dualsubslice5 sampler output is ready Unit: percent.";
         counter->symbol_name = "Sampler05OutputReady";
         counter->category = "GPU/Sampler";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_tdl_1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_1";
   query->symbol_name = "TDL_1";
   query->guid = "85afcb2a-90a8-4b1a-adc3-61e322884520";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 29);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x2611001C },
         { .reg = 0x00009888, .val = 0x2631001C },
         { .reg = 0x00009888, .val = 0x2651001C },
         { .reg = 0x00009888, .val = 0x2671001C },
         { .reg = 0x00009888, .val = 0x2691001C },
         { .reg = 0x00009888, .val = 0x26B1001C },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x24070002 },
         { .reg = 0x00009888, .val = 0x16110103 },
         { .reg = 0x00009888, .val = 0x1C110104 },
         { .reg = 0x00009888, .val = 0x1E110105 },
         { .reg = 0x00009888, .val = 0x02110106 },
         { .reg = 0x00009888, .val = 0x04110107 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x0A110000 },
         { .reg = 0x00009888, .val = 0x0E110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x16128000 },
         { .reg = 0x00009888, .val = 0x1C128000 },
         { .reg = 0x00009888, .val = 0x1E128000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x1C132000 },
         { .reg = 0x00009888, .val = 0x1E130003 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x14310103 },
         { .reg = 0x00009888, .val = 0x06310104 },
         { .reg = 0x00009888, .val = 0x08310105 },
         { .reg = 0x00009888, .val = 0x0A310106 },
         { .reg = 0x00009888, .val = 0x0C310107 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x02310000 },
         { .reg = 0x00009888, .val = 0x04310000 },
         { .reg = 0x00009888, .val = 0x14328000 },
         { .reg = 0x00009888, .val = 0x06328000 },
         { .reg = 0x00009888, .val = 0x08328000 },
         { .reg = 0x00009888, .val = 0x0A328000 },
         { .reg = 0x00009888, .val = 0x0C328000 },
         { .reg = 0x00009888, .val = 0x1C331100 },
         { .reg = 0x00009888, .val = 0x16338000 },
         { .reg = 0x00009888, .val = 0x18338000 },
         { .reg = 0x00009888, .val = 0x1A338000 },
         { .reg = 0x00009888, .val = 0x12510103 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x08510000 },
         { .reg = 0x00009888, .val = 0x12528000 },
         { .reg = 0x00009888, .val = 0x1C530800 },
         { .reg = 0x00009888, .val = 0x10710103 },
         { .reg = 0x00009888, .val = 0x08710000 },
         { .reg = 0x00009888, .val = 0x10728000 },
         { .reg = 0x00009888, .val = 0x1C730400 },
         { .reg = 0x00009888, .val = 0x0E910103 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x06910000 },
         { .reg = 0x00009888, .val = 0x0E928000 },
         { .reg = 0x00009888, .val = 0x1C930200 },
         { .reg = 0x00009888, .val = 0x00B10103 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1C0F8000 },
         { .reg = 0x00009888, .val = 0x10104000 },
         { .reg = 0x00009888, .val = 0x10024000 },
         { .reg = 0x00009888, .val = 0x1C032000 },
         { .reg = 0x00009888, .val = 0x1E034000 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x360026CA },
         { .reg = 0x00009888, .val = 0x38002402 },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x1E004000 },
         { .reg = 0x00009888, .val = 0x34001200 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x240582C0 },
         { .reg = 0x00009888, .val = 0x26050002 },
         { .reg = 0x00009888, .val = 0x2205FFA0 },
         { .reg = 0x00009888, .val = 0x24060020 },
         { .reg = 0x00009888, .val = 0x100A8000 },
         { .reg = 0x00009888, .val = 0x120A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101415 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D142AAA },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1B14AA00 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100808 },
         { .reg = 0x00009888, .val = 0x4F100006 },
         { .reg = 0x00009888, .val = 0x51100804 },
         { .reg = 0x00009888, .val = 0x53100008 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x55100800 },
         { .reg = 0x00009888, .val = 0x57100008 },
         { .reg = 0x00009888, .val = 0x49100808 },
         { .reg = 0x00009888, .val = 0x4B100808 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000002 },
         { .reg = 0x0000D944, .val = 0x0000FFFE },
         { .reg = 0x0000DC00, .val = 0x00000002 },
         { .reg = 0x0000DC04, .val = 0x0000FFFE },
         { .reg = 0x0000D948, .val = 0x00000002 },
         { .reg = 0x0000D94C, .val = 0x0000FFFD },
         { .reg = 0x0000DC08, .val = 0x00000002 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFD },
         { .reg = 0x0000D950, .val = 0x00000002 },
         { .reg = 0x0000D954, .val = 0x0000FFFB },
         { .reg = 0x0000DC10, .val = 0x00000002 },
         { .reg = 0x0000DC14, .val = 0x0000FFFB },
         { .reg = 0x0000D958, .val = 0x00000002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00000002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00000002 },
         { .reg = 0x0000D964, .val = 0x0000FFEF },
         { .reg = 0x0000DC20, .val = 0x00000002 },
         { .reg = 0x0000DC24, .val = 0x0000FFEF },
         { .reg = 0x0000D968, .val = 0x00000002 },
         { .reg = 0x0000D96C, .val = 0x0000FFDF },
         { .reg = 0x0000DC28, .val = 0x00000002 },
         { .reg = 0x0000DC2C, .val = 0x0000FFDF },
         { .reg = 0x0000D970, .val = 0x00007800 },
         { .reg = 0x0000D974, .val = 0x0000F0FF },
         { .reg = 0x0000DC30, .val = 0x00007800 },
         { .reg = 0x0000DC34, .val = 0x0000F0FF },
         { .reg = 0x0000D978, .val = 0x00078000 },
         { .reg = 0x0000D97C, .val = 0x00000FFF },
         { .reg = 0x0000DC38, .val = 0x00078000 },
         { .reg = 0x0000DC3C, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__tdl_1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_1__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_1__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_1__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__non_ps_thread00_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__non_ps_thread01_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__non_ps_thread02_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__non_ps_thread03_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__non_ps_thread04_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__non_ps_thread05_ready_for_dispatch__read;
         counter->name = "Non-PS Thread Ready For Dispatch on Slice0 DualSubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which non-PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "NonPSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header00_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header00_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header00_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header00_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader00ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header01_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header01_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header01_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header01_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader01ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header00_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader00Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 148;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_1__thread_header01_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader01Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 152;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_tdl_2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_2";
   query->symbol_name = "TDL_2";
   query->guid = "780e4dd7-8360-4294-9ff6-3bef25e0fe09";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 24);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x24110340 },
         { .reg = 0x00009888, .val = 0x24310340 },
         { .reg = 0x00009888, .val = 0x24510340 },
         { .reg = 0x00009888, .val = 0x24710340 },
         { .reg = 0x00009888, .val = 0x24910340 },
         { .reg = 0x00009888, .val = 0x24B10340 },
         { .reg = 0x00009888, .val = 0x26B1001C },
         { .reg = 0x00009888, .val = 0x1C07C000 },
         { .reg = 0x00009888, .val = 0x2407002B },
         { .reg = 0x00009888, .val = 0x2207FF00 },
         { .reg = 0x00009888, .val = 0x021100F3 },
         { .reg = 0x00009888, .val = 0x041100F2 },
         { .reg = 0x00009888, .val = 0x10110000 },
         { .reg = 0x00009888, .val = 0x00110000 },
         { .reg = 0x00009888, .val = 0x02128000 },
         { .reg = 0x00009888, .val = 0x04128000 },
         { .reg = 0x00009888, .val = 0x12138000 },
         { .reg = 0x00009888, .val = 0x14138000 },
         { .reg = 0x00009888, .val = 0x1C3100F3 },
         { .reg = 0x00009888, .val = 0x1E3100F2 },
         { .reg = 0x00009888, .val = 0x10310000 },
         { .reg = 0x00009888, .val = 0x0E310000 },
         { .reg = 0x00009888, .val = 0x1C328000 },
         { .reg = 0x00009888, .val = 0x1E328000 },
         { .reg = 0x00009888, .val = 0x1E330003 },
         { .reg = 0x00009888, .val = 0x185100F3 },
         { .reg = 0x00009888, .val = 0x1A5100F2 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x0C510000 },
         { .reg = 0x00009888, .val = 0x18528000 },
         { .reg = 0x00009888, .val = 0x1A528000 },
         { .reg = 0x00009888, .val = 0x1C53C000 },
         { .reg = 0x00009888, .val = 0x147100F3 },
         { .reg = 0x00009888, .val = 0x167100F2 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0A710000 },
         { .reg = 0x00009888, .val = 0x14728000 },
         { .reg = 0x00009888, .val = 0x16728000 },
         { .reg = 0x00009888, .val = 0x1C733000 },
         { .reg = 0x00009888, .val = 0x109100F3 },
         { .reg = 0x00009888, .val = 0x129100F2 },
         { .reg = 0x00009888, .val = 0x08910000 },
         { .reg = 0x00009888, .val = 0x10928000 },
         { .reg = 0x00009888, .val = 0x12928000 },
         { .reg = 0x00009888, .val = 0x1C930C00 },
         { .reg = 0x00009888, .val = 0x00B100F3 },
         { .reg = 0x00009888, .val = 0x0EB100F2 },
         { .reg = 0x00009888, .val = 0x06B10104 },
         { .reg = 0x00009888, .val = 0x08B10105 },
         { .reg = 0x00009888, .val = 0x0AB10106 },
         { .reg = 0x00009888, .val = 0x0CB10107 },
         { .reg = 0x00009888, .val = 0x10B10000 },
         { .reg = 0x00009888, .val = 0x02B10000 },
         { .reg = 0x00009888, .val = 0x04B10000 },
         { .reg = 0x00009888, .val = 0x00B28000 },
         { .reg = 0x00009888, .val = 0x0EB28000 },
         { .reg = 0x00009888, .val = 0x06B28000 },
         { .reg = 0x00009888, .val = 0x08B28000 },
         { .reg = 0x00009888, .val = 0x0AB28000 },
         { .reg = 0x00009888, .val = 0x0CB28000 },
         { .reg = 0x00009888, .val = 0x10B38000 },
         { .reg = 0x00009888, .val = 0x1CB30300 },
         { .reg = 0x00009888, .val = 0x16B38000 },
         { .reg = 0x00009888, .val = 0x18B38000 },
         { .reg = 0x00009888, .val = 0x1AB38000 },
         { .reg = 0x00009888, .val = 0x1E0F0028 },
         { .reg = 0x00009888, .val = 0x14104000 },
         { .reg = 0x00009888, .val = 0x16104000 },
         { .reg = 0x00009888, .val = 0x2C024000 },
         { .reg = 0x00009888, .val = 0x2E020001 },
         { .reg = 0x00009888, .val = 0x2A03A500 },
         { .reg = 0x00009888, .val = 0x18002000 },
         { .reg = 0x00009888, .val = 0x36003249 },
         { .reg = 0x00009888, .val = 0x380024DB },
         { .reg = 0x00009888, .val = 0x1A004000 },
         { .reg = 0x00009888, .val = 0x1C004000 },
         { .reg = 0x00009888, .val = 0x1E002000 },
         { .reg = 0x00009888, .val = 0x34000900 },
         { .reg = 0x00009888, .val = 0x00014000 },
         { .reg = 0x00009888, .val = 0x1A012000 },
         { .reg = 0x00009888, .val = 0x1C012000 },
         { .reg = 0x00009888, .val = 0x1E012000 },
         { .reg = 0x00009888, .val = 0x06014000 },
         { .reg = 0x00009888, .val = 0x08014000 },
         { .reg = 0x00009888, .val = 0x0A014000 },
         { .reg = 0x00009888, .val = 0x18012000 },
         { .reg = 0x00009888, .val = 0x2405C000 },
         { .reg = 0x00009888, .val = 0x26050003 },
         { .reg = 0x00009888, .val = 0x220500A0 },
         { .reg = 0x00009888, .val = 0x24062800 },
         { .reg = 0x00009888, .val = 0x140A8000 },
         { .reg = 0x00009888, .val = 0x160A8000 },
         { .reg = 0x00009888, .val = 0x180A8000 },
         { .reg = 0x00009888, .val = 0x1A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x41104000 },
         { .reg = 0x00009888, .val = 0x5B105555 },
         { .reg = 0x00009888, .val = 0x5D101555 },
         { .reg = 0x00009888, .val = 0x17144000 },
         { .reg = 0x00009888, .val = 0x1D14AAAA },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B14AA00 },
         { .reg = 0x00009888, .val = 0x01124000 },
         { .reg = 0x00009888, .val = 0x0F124000 },
         { .reg = 0x00009888, .val = 0x11124000 },
         { .reg = 0x00009888, .val = 0x13124000 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x47100600 },
         { .reg = 0x00009888, .val = 0x4D100606 },
         { .reg = 0x00009888, .val = 0x4F100606 },
         { .reg = 0x00009888, .val = 0x51100006 },
         { .reg = 0x00009888, .val = 0x53100400 },
         { .reg = 0x00009888, .val = 0x55100804 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100008 },
         { .reg = 0x00009888, .val = 0x49100808 },
         { .reg = 0x00009888, .val = 0x4B100606 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x70800000 },
         { .reg = 0x0000DC40, .val = 0x007F0000 },
         { .reg = 0x0000D940, .val = 0x00000000 },
         { .reg = 0x0000D944, .val = 0x0000FFFC },
         { .reg = 0x0000DC00, .val = 0x00000000 },
         { .reg = 0x0000DC04, .val = 0x0000FFFC },
         { .reg = 0x0000D948, .val = 0x00000000 },
         { .reg = 0x0000D94C, .val = 0x0000FFF3 },
         { .reg = 0x0000DC08, .val = 0x00000000 },
         { .reg = 0x0000DC0C, .val = 0x0000FFF3 },
         { .reg = 0x0000D950, .val = 0x00000000 },
         { .reg = 0x0000D954, .val = 0x0000FFCF },
         { .reg = 0x0000DC10, .val = 0x00000000 },
         { .reg = 0x0000DC14, .val = 0x0000FFCF },
         { .reg = 0x0000D958, .val = 0x00000000 },
         { .reg = 0x0000D95C, .val = 0x0000FF3F },
         { .reg = 0x0000DC18, .val = 0x00000000 },
         { .reg = 0x0000DC1C, .val = 0x0000FF3F },
         { .reg = 0x0000D960, .val = 0x00000000 },
         { .reg = 0x0000D964, .val = 0x0000FCFF },
         { .reg = 0x0000DC20, .val = 0x00000000 },
         { .reg = 0x0000DC24, .val = 0x0000FCFF },
         { .reg = 0x0000D968, .val = 0x00000000 },
         { .reg = 0x0000D96C, .val = 0x0000F3FF },
         { .reg = 0x0000DC28, .val = 0x00000000 },
         { .reg = 0x0000DC2C, .val = 0x0000F3FF },
         { .reg = 0x0000D970, .val = 0x00078000 },
         { .reg = 0x0000D974, .val = 0x00000FFF },
         { .reg = 0x0000DC30, .val = 0x00078000 },
         { .reg = 0x0000DC34, .val = 0x00000FFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__tdl_2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_2__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_2__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_2__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 1) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__ps_thread00_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice0 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice0 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread00ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 2) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__ps_thread01_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice1 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice1 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread01ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__ps_thread02_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread02ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__ps_thread03_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread03ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__ps_thread04_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread04ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__ps_thread05_ready_for_dispatch__read;
         counter->name = "PS Thread Ready For Dispatch on Slice0 Dualsubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which PS thread is ready for dispatch on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "PSThread05ReadyForDispatch";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__thread_header05_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader05Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__thread_header05_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__thread_header05_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__thread_header05_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 32) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_2__thread_header05_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice5 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice5 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader05ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_tdl_3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "TDL_3";
   query->symbol_name = "TDL_3";
   query->guid = "0348b595-94ef-445f-b4e5-11d0076b6307";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 28);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x2651001C },
         { .reg = 0x00009888, .val = 0x2671001C },
         { .reg = 0x00009888, .val = 0x2691001C },
         { .reg = 0x00009888, .val = 0x24072A80 },
         { .reg = 0x00009888, .val = 0x06510107 },
         { .reg = 0x00009888, .val = 0x08510106 },
         { .reg = 0x00009888, .val = 0x0A510105 },
         { .reg = 0x00009888, .val = 0x0C510104 },
         { .reg = 0x00009888, .val = 0x10510000 },
         { .reg = 0x00009888, .val = 0x02510000 },
         { .reg = 0x00009888, .val = 0x04510000 },
         { .reg = 0x00009888, .val = 0x06528000 },
         { .reg = 0x00009888, .val = 0x08528000 },
         { .reg = 0x00009888, .val = 0x0A528000 },
         { .reg = 0x00009888, .val = 0x0C528000 },
         { .reg = 0x00009888, .val = 0x16538000 },
         { .reg = 0x00009888, .val = 0x18538000 },
         { .reg = 0x00009888, .val = 0x1A538000 },
         { .reg = 0x00009888, .val = 0x1C530100 },
         { .reg = 0x00009888, .val = 0x1C710107 },
         { .reg = 0x00009888, .val = 0x1E710106 },
         { .reg = 0x00009888, .val = 0x02710105 },
         { .reg = 0x00009888, .val = 0x04710104 },
         { .reg = 0x00009888, .val = 0x10710000 },
         { .reg = 0x00009888, .val = 0x0E710000 },
         { .reg = 0x00009888, .val = 0x00710000 },
         { .reg = 0x00009888, .val = 0x1C728000 },
         { .reg = 0x00009888, .val = 0x1E728000 },
         { .reg = 0x00009888, .val = 0x02728000 },
         { .reg = 0x00009888, .val = 0x04728000 },
         { .reg = 0x00009888, .val = 0x1E730003 },
         { .reg = 0x00009888, .val = 0x12738000 },
         { .reg = 0x00009888, .val = 0x14738000 },
         { .reg = 0x00009888, .val = 0x14910107 },
         { .reg = 0x00009888, .val = 0x16910106 },
         { .reg = 0x00009888, .val = 0x18910105 },
         { .reg = 0x00009888, .val = 0x1A910104 },
         { .reg = 0x00009888, .val = 0x10910000 },
         { .reg = 0x00009888, .val = 0x0A910000 },
         { .reg = 0x00009888, .val = 0x0C910000 },
         { .reg = 0x00009888, .val = 0x14928000 },
         { .reg = 0x00009888, .val = 0x16928000 },
         { .reg = 0x00009888, .val = 0x18928000 },
         { .reg = 0x00009888, .val = 0x1A928000 },
         { .reg = 0x00009888, .val = 0x1C93F000 },
         { .reg = 0x00009888, .val = 0x1E0F2800 },
         { .reg = 0x00009888, .val = 0x1C0F000A },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x02104000 },
         { .reg = 0x00009888, .val = 0x04104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x02024000 },
         { .reg = 0x00009888, .val = 0x04024000 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x02034000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x06038000 },
         { .reg = 0x00009888, .val = 0x08038000 },
         { .reg = 0x00009888, .val = 0x0A038000 },
         { .reg = 0x00009888, .val = 0x18034000 },
         { .reg = 0x00009888, .val = 0x36001003 },
         { .reg = 0x00009888, .val = 0x38003649 },
         { .reg = 0x00009888, .val = 0x1A006000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x1E006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x2A015500 },
         { .reg = 0x00009888, .val = 0x2206AA00 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009888, .val = 0x020A8000 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x060A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009888, .val = 0x0C0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x5D101554 },
         { .reg = 0x00009888, .val = 0x5B100555 },
         { .reg = 0x00009888, .val = 0x1D14A80A },
         { .reg = 0x00009888, .val = 0x1F14002A },
         { .reg = 0x00009888, .val = 0x1B14AA00 },
         { .reg = 0x00009888, .val = 0x15124000 },
         { .reg = 0x00009888, .val = 0x17124000 },
         { .reg = 0x00009888, .val = 0x19124000 },
         { .reg = 0x00009888, .val = 0x1B124000 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x03124000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x07124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D124000 },
         { .reg = 0x00009888, .val = 0x51100600 },
         { .reg = 0x00009888, .val = 0x53100606 },
         { .reg = 0x00009888, .val = 0x55100006 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49100000 },
         { .reg = 0x00009888, .val = 0x4B100404 },
         { .reg = 0x00009888, .val = 0x4D100404 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x70800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00070000 },
         { .reg = 0x0000D940, .val = 0x00078000 },
         { .reg = 0x0000D944, .val = 0x00000FFF },
         { .reg = 0x0000DC00, .val = 0x00078000 },
         { .reg = 0x0000DC04, .val = 0x00000FFF },
         { .reg = 0x0000D948, .val = 0x00007800 },
         { .reg = 0x0000D94C, .val = 0x0000F0FF },
         { .reg = 0x0000DC08, .val = 0x00007800 },
         { .reg = 0x0000DC0C, .val = 0x0000F0FF },
         { .reg = 0x0000D950, .val = 0x00000780 },
         { .reg = 0x0000D954, .val = 0x0000FF0F },
         { .reg = 0x0000DC10, .val = 0x00000780 },
         { .reg = 0x0000DC14, .val = 0x0000FF0F },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__tdl_3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__tdl_3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_3__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_3__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__tdl_3__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header02_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader02Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 92;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header03_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader03Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 96;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header04_ready__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher Unit: percent.";
         counter->symbol_name = "ThreadHeader04Ready";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 100;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header02_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 104;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header02_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 108;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header02_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 112;
      }

      if (perf->sys_vars.subslice_mask & 4) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header02_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice2 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice2 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader02ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 116;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header03_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 120;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header03_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 124;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header03_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 128;
      }

      if (perf->sys_vars.subslice_mask & 8) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header03_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice3 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice3 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader03ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 132;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header04_ready_port0__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 0";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 0 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort0";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 136;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header04_ready_port1__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 1";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 1 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort1";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 140;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header04_ready_port2__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 2";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 2 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort2";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 144;
      }

      if (perf->sys_vars.subslice_mask & 16) {
         counter = &query->counters[query->n_counters++];
         counter->oa_counter_read_float = adl__tdl_3__thread_header04_ready_port3__read;
         counter->name = "Thread Header Ready on Slice0 DualSubslice4 Thread Dispatcher Port 3";
         counter->desc = "The percentage of time in which thread header is ready on slice0 dualsubslice4 thread dispatcher port 3 Unit: percent.";
         counter->symbol_name = "ThreadHeader04ReadyPort3";
         counter->category = "GPU/Thread Dispatcher";
         counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
         counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
         counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
         counter->raw_max = 100.0;
         counter->offset = 148;
      }

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_gpu_busyness_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "GpuBusyness";
   query->symbol_name = "GpuBusyness";
   query->guid = "67577936-ac11-46a1-b07d-44ea1318df5b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 22);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x0E101200 },
         { .reg = 0x00009888, .val = 0x040E0043 },
         { .reg = 0x00009888, .val = 0x0A0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x0C0E0000 },
         { .reg = 0x00009888, .val = 0x1C0F0104 },
         { .reg = 0x00009888, .val = 0x08100053 },
         { .reg = 0x00009888, .val = 0x20100000 },
         { .reg = 0x00009888, .val = 0x04104000 },
         { .reg = 0x00009888, .val = 0x0A104000 },
         { .reg = 0x00009888, .val = 0x04024000 },
         { .reg = 0x00009888, .val = 0x08024000 },
         { .reg = 0x00009888, .val = 0x0A024000 },
         { .reg = 0x00009888, .val = 0x04034000 },
         { .reg = 0x00009888, .val = 0x08034000 },
         { .reg = 0x00009888, .val = 0x0A034000 },
         { .reg = 0x00009888, .val = 0x1C006000 },
         { .reg = 0x00009888, .val = 0x34001B00 },
         { .reg = 0x00009888, .val = 0x040A8000 },
         { .reg = 0x00009888, .val = 0x080A8000 },
         { .reg = 0x00009888, .val = 0x0A0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x15102400 },
         { .reg = 0x00009888, .val = 0x23010120 },
         { .reg = 0x00009888, .val = 0x15182400 },
         { .reg = 0x00009888, .val = 0x23210120 },
         { .reg = 0x00009888, .val = 0x17100023 },
         { .reg = 0x00009888, .val = 0x11100000 },
         { .reg = 0x00009888, .val = 0x5D101000 },
         { .reg = 0x00009888, .val = 0x5B100545 },
         { .reg = 0x00009888, .val = 0x1B148800 },
         { .reg = 0x00009888, .val = 0x1D140002 },
         { .reg = 0x00009888, .val = 0x61112000 },
         { .reg = 0x00009888, .val = 0x5F110401 },
         { .reg = 0x00009888, .val = 0x1F128000 },
         { .reg = 0x00009888, .val = 0x03128000 },
         { .reg = 0x00009888, .val = 0x05124000 },
         { .reg = 0x00009888, .val = 0x09124000 },
         { .reg = 0x00009888, .val = 0x0B124000 },
         { .reg = 0x00009888, .val = 0x0D128000 },
         { .reg = 0x00009888, .val = 0x01008000 },
         { .reg = 0x00009888, .val = 0x13028000 },
         { .reg = 0x00009888, .val = 0x030100D3 },
         { .reg = 0x00009888, .val = 0x21010000 },
         { .reg = 0x00009888, .val = 0x071800A3 },
         { .reg = 0x00009888, .val = 0x11180000 },
         { .reg = 0x00009888, .val = 0x21180400 },
         { .reg = 0x00009888, .val = 0x23180000 },
         { .reg = 0x00009888, .val = 0x0F208000 },
         { .reg = 0x00009888, .val = 0x21222000 },
         { .reg = 0x00009888, .val = 0x1F2100D3 },
         { .reg = 0x00009888, .val = 0x21210000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47100000 },
         { .reg = 0x00009888, .val = 0x49101007 },
         { .reg = 0x00009888, .val = 0x4B10040A },
         { .reg = 0x00009888, .val = 0x4D100210 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x30800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00030000 },
         { .reg = 0x0000D940, .val = 0x00024002 },
         { .reg = 0x0000D944, .val = 0x0000B7FF },
         { .reg = 0x0000DC00, .val = 0x00024002 },
         { .reg = 0x0000DC04, .val = 0x0000B7FF },
         { .reg = 0x0000D948, .val = 0x0007F000 },
         { .reg = 0x0000D94C, .val = 0x000001FF },
         { .reg = 0x0000DC08, .val = 0x0007F000 },
         { .reg = 0x0000DC0C, .val = 0x000001FF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00804704 },
         { .reg = 0x0000E558, .val = 0x00A04904 },
         { .reg = 0x0000E658, .val = 0x00805705 },
         { .reg = 0x0000E758, .val = 0x00A05905 },
         { .reg = 0x0000E45C, .val = 0x00808708 },
         { .reg = 0x0000E55C, .val = 0x00A08908 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__gpu_busyness__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__eu_active__read;
      counter->name = "EU Active";
      counter->desc = "The percentage of time in which the Execution Units were actively processing. Unit: percent.";
      counter->symbol_name = "EuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 28;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__eu_stall__read;
      counter->name = "EU Stall";
      counter->desc = "The percentage of time in which the Execution Units were stalled. Unit: percent.";
      counter->symbol_name = "EuStall";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__eu_thread_occupancy__read;
      counter->name = "EU Thread Occupancy";
      counter->desc = "The percentage of time in which hardware threads occupied EUs. Unit: percent.";
      counter->symbol_name = "EuThreadOccupancy";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 36;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__gpu_busyness__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__posh_engine_busy__read;
      counter->name = "Posh Ring Busy";
      counter->desc = "The percentage of time when posh command streamer was busy. Unit: percent.";
      counter->symbol_name = "PoshEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__blitter_busy__read;
      counter->name = "Blitter Ring Busy";
      counter->desc = "The percentage of time when blitter command streamer was busy. Unit: percent.";
      counter->symbol_name = "BlitterBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__vebox_busy__read;
      counter->name = "Vebox Ring Busy";
      counter->desc = "The percentage of time when vebox command streamer was busy. Unit: percent.";
      counter->symbol_name = "VeboxBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__vdbox0_busy__read;
      counter->name = "Vdbox0 Ring Busy";
      counter->desc = "The percentage of time when Vdbox0 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox0Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 108;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__vdbox1_busy__read;
      counter->name = "Vdbox1 Ring Busy";
      counter->desc = "The percentage of time when Vdbox1 command streamer was busy. Unit: percent.";
      counter->symbol_name = "Vdbox1Busy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__gpu_busyness__any_engine_busy__read;
      counter->name = "Any Engine Busy";
      counter->desc = "The percentage of time when any command streamer was busy. Unit: percent.";
      counter->symbol_name = "AnyEngineBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity1_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity1";
   query->symbol_name = "EuActivity1";
   query->guid = "b1c1cb56-f82f-444e-b408-aebbe4777f40";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00810710 },
         { .reg = 0x0000E558, .val = 0x00A10910 },
         { .reg = 0x0000E658, .val = 0x00850750 },
         { .reg = 0x0000E758, .val = 0x00A50950 },
         { .reg = 0x0000E45C, .val = 0x00802702 },
         { .reg = 0x0000E55C, .val = 0x00A02902 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity1__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity1__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity1__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity1__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity1__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity1__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity1__vs_fpu_active__read;
      counter->name = "VS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsFpuActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity1__ps_fpu_active__read;
      counter->name = "PS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsFpuActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity1__eu_send_active__read;
      counter->name = "EU Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EuSendActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity2_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity2";
   query->symbol_name = "EuActivity2";
   query->guid = "b45c7714-75be-4639-83b8-258392695044";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00862762 },
         { .reg = 0x0000E558, .val = 0x00A62962 },
         { .reg = 0x0000E658, .val = 0x00860760 },
         { .reg = 0x0000E758, .val = 0x00A60960 },
         { .reg = 0x0000E45C, .val = 0x00861761 },
         { .reg = 0x0000E55C, .val = 0x00A61961 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity2__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity2__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity2__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity2__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity2__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity2__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity2__cs_em_active__read;
      counter->name = "CS EM Pipe Active";
      counter->desc = "The percentage of time in which EU FPU1 pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsEmActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity2__cs_fpu_active__read;
      counter->name = "CS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsFpuActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity2__cs_send_active__read;
      counter->name = "CS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a compute shader instruction. Unit: percent.";
      counter->symbol_name = "CsSendActive";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity3_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity3";
   query->symbol_name = "EuActivity3";
   query->guid = "e63fc92b-0c14-48b8-b832-52631f75b1c6";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00811711 },
         { .reg = 0x0000E558, .val = 0x00A11911 },
         { .reg = 0x0000E658, .val = 0x00851751 },
         { .reg = 0x0000E758, .val = 0x00A51951 },
         { .reg = 0x0000E45C, .val = 0x00852752 },
         { .reg = 0x0000E55C, .val = 0x00A52952 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity3__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity3__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity3__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity3__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity3__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity3__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity3__vs_em_active__read;
      counter->name = "VS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsEmActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity3__ps_em_active__read;
      counter->name = "PS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsEmActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity3__ps_send_active__read;
      counter->name = "PS Send Pipeline Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a pixel shader instruction. Unit: percent.";
      counter->symbol_name = "PsSendActive";
      counter->category = "EU Array/Pixel Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity4_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity4";
   query->symbol_name = "EuActivity4";
   query->guid = "a5bd7cc2-3b17-4287-bdb2-a6814998daa1";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00820720 },
         { .reg = 0x0000E558, .val = 0x00A20920 },
         { .reg = 0x0000E658, .val = 0x00830730 },
         { .reg = 0x0000E758, .val = 0x00A30930 },
         { .reg = 0x0000E45C, .val = 0x00812712 },
         { .reg = 0x0000E55C, .val = 0x00A12912 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity4__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity4__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity4__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity4__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity4__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity4__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity4__hs_fpu_active__read;
      counter->name = "HS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsFpuActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity4__ds_fpu_active__read;
      counter->name = "DS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsFpuActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity4__vs_send_active__read;
      counter->name = "VS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a vertex shader instruction. Unit: percent.";
      counter->symbol_name = "VsSendActive";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity5_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity5";
   query->symbol_name = "EuActivity5";
   query->guid = "036c9033-142d-438f-a5ec-ea0ec62a6f43";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00821721 },
         { .reg = 0x0000E558, .val = 0x00A21921 },
         { .reg = 0x0000E658, .val = 0x00831731 },
         { .reg = 0x0000E758, .val = 0x00A31931 },
         { .reg = 0x0000E45C, .val = 0x00822722 },
         { .reg = 0x0000E55C, .val = 0x00A22922 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity5__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity5__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity5__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity5__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity5__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity5__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity5__hs_em_active__read;
      counter->name = "HS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a hull shader instructions. Unit: percent.";
      counter->symbol_name = "HsEmActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity5__ds_em_active__read;
      counter->name = "DS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a domain shader instructions. Unit: percent.";
      counter->symbol_name = "DsEmActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity5__hs_send_active__read;
      counter->name = "HS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a hull shader instruction. Unit: percent.";
      counter->symbol_name = "HsSendActive";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity6_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity6";
   query->symbol_name = "EuActivity6";
   query->guid = "fbba3669-b6db-4aa8-a993-b2e51998a8dd";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 18);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00840740 },
         { .reg = 0x0000E558, .val = 0x00A40940 },
         { .reg = 0x0000E658, .val = 0x00841741 },
         { .reg = 0x0000E758, .val = 0x00A41941 },
         { .reg = 0x0000E45C, .val = 0x00842742 },
         { .reg = 0x0000E55C, .val = 0x00A42942 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity6__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity6__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity6__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity6__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity6__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity6__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity6__gs_fpu_active__read;
      counter->name = "GS FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsFpuActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity6__gs_em_active__read;
      counter->name = "GS EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing a geometry shader instructions. Unit: percent.";
      counter->symbol_name = "GsEmActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 116;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity6__gs_send_active__read;
      counter->name = "GS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a geometry shader instruction. Unit: percent.";
      counter->symbol_name = "GsSendActive";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity7_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity7";
   query->symbol_name = "EuActivity7";
   query->guid = "c3ea2a23-f1c7-4a19-9da2-b569226fb6de";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 19);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00803703 },
         { .reg = 0x0000E558, .val = 0x00A03903 },
         { .reg = 0x0000E658, .val = 0x00800700 },
         { .reg = 0x0000E758, .val = 0x00A00900 },
         { .reg = 0x0000E45C, .val = 0x00801701 },
         { .reg = 0x0000E55C, .val = 0x00A01901 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity7__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__fpu_active__read;
      counter->name = "EU FPU Pipe Active";
      counter->desc = "The percentage of time in which EU FPU pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "FpuActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__em_active__read;
      counter->name = "EM Pipe Active";
      counter->desc = "The percentage of time in which EU EM pipeline was actively processing. Unit: percent.";
      counter->symbol_name = "EmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__eu_fpu_em_active__read;
      counter->name = "EU FPU And EM Pipes Active";
      counter->desc = "The percentage of time in which EU FPU and EM pipelines were actively processing. Unit: percent.";
      counter->symbol_name = "EuFpuEmActive";
      counter->category = "EU Array/Pipes";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__eu_avg_ipc_rate__read;
      counter->name = "EU AVG IPC Rate";
      counter->desc = "The average rate of IPC calculated for 2 FPU pipelines.";
      counter->symbol_name = "EuAvgIpcRate";
      counter->category = "EU Array";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_NUMBER;
      counter->raw_max = 2.0;
      counter->offset = 92;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 100;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity7__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 112;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity7__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 120;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_eu_activity8_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "EuActivity8";
   query->symbol_name = "EuActivity8";
   query->guid = "a5e2f79b-cecb-4eff-8f29-cda8e2a58749";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 16);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x0E0E1200 },
         { .reg = 0x00009888, .val = 0x220E0009 },
         { .reg = 0x00009888, .val = 0x1C0E0043 },
         { .reg = 0x00009888, .val = 0x1E0E00B3 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1400 },
         { .reg = 0x00009888, .val = 0x1C104000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020140 },
         { .reg = 0x00009888, .val = 0x2C030005 },
         { .reg = 0x00009888, .val = 0x38003600 },
         { .reg = 0x00009888, .val = 0x1C0A8000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x05151D37 },
         { .reg = 0x00009888, .val = 0x09151547 },
         { .reg = 0x00009888, .val = 0x05351C00 },
         { .reg = 0x00009888, .val = 0x09351400 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x5B100BBB },
         { .reg = 0x00009888, .val = 0x1F140028 },
         { .reg = 0x00009888, .val = 0x1D124000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x0D150136 },
         { .reg = 0x00009888, .val = 0x01150000 },
         { .reg = 0x00009888, .val = 0x03164000 },
         { .reg = 0x00009888, .val = 0x05164000 },
         { .reg = 0x00009888, .val = 0x07164000 },
         { .reg = 0x00009888, .val = 0x03350137 },
         { .reg = 0x00009888, .val = 0x07350147 },
         { .reg = 0x00009888, .val = 0x0B350136 },
         { .reg = 0x00009888, .val = 0x01350000 },
         { .reg = 0x00009888, .val = 0x01368000 },
         { .reg = 0x00009888, .val = 0x03368000 },
         { .reg = 0x00009888, .val = 0x05368000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100000 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x47101000 },
         { .reg = 0x00009888, .val = 0x49101616 },
         { .reg = 0x00009888, .val = 0x4B101616 },
         { .reg = 0x00009888, .val = 0x4D100616 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0x10800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0x00800000 },
         { .reg = 0x0000DC40, .val = 0x00010000 },
         { .reg = 0x0000D940, .val = 0x00001802 },
         { .reg = 0x0000D944, .val = 0x0000FCFF },
         { .reg = 0x0000DC00, .val = 0x00001802 },
         { .reg = 0x0000DC04, .val = 0x0000FCFF },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E458, .val = 0x00832732 },
         { .reg = 0x0000E558, .val = 0x00A32932 },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__eu_activity8__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity8__gpu_busy__read;
      counter->name = "GPU Busy";
      counter->desc = "The percentage of time in which the GPU has been processing GPU commands. Unit: percent.";
      counter->symbol_name = "GpuBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__vs_threads__read;
      counter->name = "VS Threads Dispatched";
      counter->desc = "The total number of vertex shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "VsThreads";
      counter->category = "EU Array/Vertex Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__hs_threads__read;
      counter->name = "HS Threads Dispatched";
      counter->desc = "The total number of hull shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "HsThreads";
      counter->category = "EU Array/Hull Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__ds_threads__read;
      counter->name = "DS Threads Dispatched";
      counter->desc = "The total number of domain shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "DsThreads";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__gs_threads__read;
      counter->name = "GS Threads Dispatched";
      counter->desc = "The total number of geometry shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "GsThreads";
      counter->category = "EU Array/Geometry Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__ps_threads__read;
      counter->name = "FS Threads Dispatched";
      counter->desc = "The total number of fragment shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "PsThreads";
      counter->category = "EU Array/Fragment Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__cs_threads__read;
      counter->name = "CS Threads Dispatched";
      counter->desc = "The total number of compute shader hardware threads dispatched. Unit: threads.";
      counter->symbol_name = "CsThreads";
      counter->category = "EU Array/Compute Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_THREADS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity8__render_busy__read;
      counter->name = "Render Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "RenderBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity8__compute_busy__read;
      counter->name = "Compute Ring Busy";
      counter->desc = "The percentage of time when render command streamer was busy. Unit: percent.";
      counter->symbol_name = "ComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 84;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity8__render_and_compute_busy__read;
      counter->name = "Render and compute engines are simultaneously busy";
      counter->desc = "The percentage of time when render and compute engines are simultaneously busy Unit: percent.";
      counter->symbol_name = "RenderAndComputeBusy";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__gti_read_throughput__read;
      counter->name = "GTI Read Throughput";
      counter->desc = "The total number of GPU memory bytes read from GTI. Unit: bytes.";
      counter->symbol_name = "GtiReadThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 96;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__eu_activity8__gti_write_throughput__read;
      counter->name = "GTI Write Throughput";
      counter->desc = "The total number of GPU memory bytes written to GTI. Unit: bytes.";
      counter->symbol_name = "GtiWriteThroughput";
      counter->category = "GTI";
      counter->type = INTEL_PERF_COUNTER_TYPE_THROUGHPUT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_BYTES;
      counter->raw_max = 0 /* unsupported (varies over time) */;
      counter->offset = 104;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_float = adl__eu_activity8__ds_send_active__read;
      counter->name = "DS Send Pipe Active";
      counter->desc = "The percentage of time in which EU send pipeline was actively processing a domain shader instruction. Unit: percent.";
      counter->symbol_name = "DsSendActive";
      counter->category = "EU Array/Domain Shader";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_FLOAT;
      counter->units = INTEL_PERF_COUNTER_UNITS_PERCENT;
      counter->raw_max = 100.0;
      counter->offset = 112;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}


static void
adl_register_test_oa_counter_query(struct intel_perf_config *perf)
{
   struct intel_perf_query_info *query = rzalloc(perf, struct intel_perf_query_info);

   query->perf = perf;
   query->kind = INTEL_PERF_QUERY_TYPE_OA;
   query->name = "Metric set TestOa";
   query->symbol_name = "TestOa";
   query->guid = "89173c19-fcfe-48da-ac26-fb64425f141b";
   query->counters = rzalloc_array(query, struct intel_perf_query_counter, 13);
   query->n_counters = 0;
   query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
   query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
   /* Accumulation buffer offsets... */
   query->gpu_time_offset = 0;
   query->gpu_clock_offset = query->gpu_time_offset + 1;
   query->a_offset = query->gpu_clock_offset + 1;
   query->b_offset = query->a_offset + 36;
   query->c_offset = query->b_offset + 8;
   query->perfcnt_offset = query->c_offset + 8;
   query->rpstat_offset = query->perfcnt_offset + 2;

   struct intel_perf_query_counter *counter = query->counters;

   /* Note: we're assuming there can't be any variation in the definition
    * of a query between contexts so it's ok to describe a query within a
    * global variable which only needs to be initialized once... */

   if (!query->data_size) {
      static const struct intel_perf_query_register_prog mux_regs[] = {
         { .reg = 0x00000D04, .val = 0x00000200 },
         { .reg = 0x00009840, .val = 0x00000000 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x280E0000 },
         { .reg = 0x00009888, .val = 0x1E0E0147 },
         { .reg = 0x00009888, .val = 0x180E0000 },
         { .reg = 0x00009888, .val = 0x160E0000 },
         { .reg = 0x00009888, .val = 0x1E0F1000 },
         { .reg = 0x00009888, .val = 0x1E104000 },
         { .reg = 0x00009888, .val = 0x2E020100 },
         { .reg = 0x00009888, .val = 0x2C030004 },
         { .reg = 0x00009888, .val = 0x38003000 },
         { .reg = 0x00009888, .val = 0x1E0A8000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x49110000 },
         { .reg = 0x00009888, .val = 0x5D101400 },
         { .reg = 0x00009888, .val = 0x1F140020 },
         { .reg = 0x00009888, .val = 0x1D1103B3 },
         { .reg = 0x00009888, .val = 0x01110000 },
         { .reg = 0x00009888, .val = 0x61110000 },
         { .reg = 0x00009888, .val = 0x1D128000 },
         { .reg = 0x00009888, .val = 0x1F124000 },
         { .reg = 0x00009888, .val = 0x17100000 },
         { .reg = 0x00009888, .val = 0x55100510 },
         { .reg = 0x00009888, .val = 0x57100000 },
         { .reg = 0x00009888, .val = 0x31100000 },
         { .reg = 0x00009884, .val = 0x00000003 },
         { .reg = 0x00009888, .val = 0x65100002 },
         { .reg = 0x00009884, .val = 0x00000000 },
         { .reg = 0x00009888, .val = 0x42000001 },
      };
      query->config.mux_regs = mux_regs;
      query->config.n_mux_regs = ARRAY_SIZE(mux_regs);

      static const struct intel_perf_query_register_prog b_counter_regs[] = {
         { .reg = 0x0000D920, .val = 0x00000000 },
         { .reg = 0x0000D900, .val = 0x00000000 },
         { .reg = 0x0000D904, .val = 0xF0800000 },
         { .reg = 0x0000D910, .val = 0x00000000 },
         { .reg = 0x0000D914, .val = 0xF0800000 },
         { .reg = 0x0000DC40, .val = 0x00FF0000 },
         { .reg = 0x0000D940, .val = 0x00000004 },
         { .reg = 0x0000D944, .val = 0x0000FFFF },
         { .reg = 0x0000DC00, .val = 0x00000004 },
         { .reg = 0x0000DC04, .val = 0x0000FFFF },
         { .reg = 0x0000D948, .val = 0x00000003 },
         { .reg = 0x0000D94C, .val = 0x0000FFFF },
         { .reg = 0x0000DC08, .val = 0x00000003 },
         { .reg = 0x0000DC0C, .val = 0x0000FFFF },
         { .reg = 0x0000D950, .val = 0x00000007 },
         { .reg = 0x0000D954, .val = 0x0000FFFF },
         { .reg = 0x0000DC10, .val = 0x00000007 },
         { .reg = 0x0000DC14, .val = 0x0000FFFF },
         { .reg = 0x0000D958, .val = 0x00100002 },
         { .reg = 0x0000D95C, .val = 0x0000FFF7 },
         { .reg = 0x0000DC18, .val = 0x00100002 },
         { .reg = 0x0000DC1C, .val = 0x0000FFF7 },
         { .reg = 0x0000D960, .val = 0x00100002 },
         { .reg = 0x0000D964, .val = 0x0000FFCF },
         { .reg = 0x0000DC20, .val = 0x00100002 },
         { .reg = 0x0000DC24, .val = 0x0000FFCF },
         { .reg = 0x0000D968, .val = 0x00100082 },
         { .reg = 0x0000D96C, .val = 0x0000FFEF },
         { .reg = 0x0000DC28, .val = 0x00100082 },
         { .reg = 0x0000DC2C, .val = 0x0000FFEF },
         { .reg = 0x0000D970, .val = 0x001000C2 },
         { .reg = 0x0000D974, .val = 0x0000FFE7 },
         { .reg = 0x0000DC30, .val = 0x001000C2 },
         { .reg = 0x0000DC34, .val = 0x0000FFE7 },
         { .reg = 0x0000D978, .val = 0x00100001 },
         { .reg = 0x0000D97C, .val = 0x0000FFE7 },
         { .reg = 0x0000DC38, .val = 0x00100001 },
         { .reg = 0x0000DC3C, .val = 0x0000FFE7 },
      };
      query->config.b_counter_regs = b_counter_regs;
      query->config.n_b_counter_regs = ARRAY_SIZE(b_counter_regs);

      static const struct intel_perf_query_register_prog flex_regs[] = {
         { .reg = 0x0000E65C, .val = 0xFFFFFFFF },
      };
      query->config.flex_regs = flex_regs;
      query->config.n_flex_regs = ARRAY_SIZE(flex_regs);


      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__gpu_time__read;
      counter->name = "GPU Time Elapsed";
      counter->desc = "Time elapsed on the GPU during the measurement. Unit: ns.";
      counter->symbol_name = "GpuTime";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_NS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 0;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__gpu_core_clocks__read;
      counter->name = "GPU Core Clocks";
      counter->desc = "The total number of GPU core clocks elapsed during the measurement. Unit: cycles.";
      counter->symbol_name = "GpuCoreClocks";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_CYCLES;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 8;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__avg_gpu_core_frequency__read;
      counter->name = "AVG GPU Core Frequency";
      counter->desc = "Average GPU Core Frequency in the measurement. Unit: Hz.";
      counter->symbol_name = "AvgGpuCoreFrequency";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_HZ;
      counter->raw_max = adl__test_oa__avg_gpu_core_frequency__max(perf);
      counter->offset = 16;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter0__read;
      counter->name = "TestCounter0";
      counter->desc = "HW test counter 0. Factor: 0.0 Unit: events.";
      counter->symbol_name = "Counter0";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 24;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter1__read;
      counter->name = "TestCounter1";
      counter->desc = "HW test counter 1. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter1";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 32;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter2__read;
      counter->name = "TestCounter2";
      counter->desc = "HW test counter 2. Factor: 1.0 Unit: events.";
      counter->symbol_name = "Counter2";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 40;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter3__read;
      counter->name = "TestCounter3";
      counter->desc = "HW test counter 3. Factor: 0.5 Unit: events.";
      counter->symbol_name = "Counter3";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 48;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter4__read;
      counter->name = "TestCounter4";
      counter->desc = "HW test counter 4. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter4";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 56;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter5__read;
      counter->name = "TestCounter5";
      counter->desc = "HW test counter 5. Factor: 0.3333 Unit: events.";
      counter->symbol_name = "Counter5";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 64;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter6__read;
      counter->name = "TestCounter6";
      counter->desc = "HW test counter 6. Factor: 0.16666 Unit: events.";
      counter->symbol_name = "Counter6";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 72;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter7__read;
      counter->name = "TestCounter7";
      counter->desc = "HW test counter 7. Factor: 0.6666 Unit: events.";
      counter->symbol_name = "Counter7";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 80;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter8__read;
      counter->name = "TestCounter8";
      counter->desc = "HW test counter 8. Should be equal to 1 in IOStream or in OAG query mode Unit: events.";
      counter->symbol_name = "Counter8";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 88;

      counter = &query->counters[query->n_counters++];
      counter->oa_counter_read_uint64 = adl__test_oa__counter9__read;
      counter->name = "TestCounter9 - OAR enable";
      counter->desc = "HW test counter 9. Should be equal to 1 in query. Unit: events.";
      counter->symbol_name = "Counter9";
      counter->category = "GPU";
      counter->type = INTEL_PERF_COUNTER_TYPE_EVENT;
      counter->data_type = INTEL_PERF_COUNTER_DATA_TYPE_UINT64;
      counter->units = INTEL_PERF_COUNTER_UNITS_EVENTS;
      counter->raw_max = 0 /* undefined */;
      counter->offset = 96;

      query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);
   }

   _mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);
}

void
intel_oa_register_queries_adl(struct intel_perf_config *perf)
{
   adl_register_render_basic_counter_query(perf);
   adl_register_compute_basic_counter_query(perf);
   adl_register_render_pipe_profile_counter_query(perf);
   adl_register_hdc_and_sf_counter_query(perf);
   adl_register_rasterizer_and_pixel_backend_counter_query(perf);
   adl_register_l3_1_counter_query(perf);
   adl_register_l3_2_counter_query(perf);
   adl_register_l3_3_counter_query(perf);
   adl_register_l3_4_counter_query(perf);
   adl_register_l3_5_counter_query(perf);
   adl_register_l3_6_counter_query(perf);
   adl_register_sampler_1_counter_query(perf);
   adl_register_sampler_2_counter_query(perf);
   adl_register_tdl_1_counter_query(perf);
   adl_register_tdl_2_counter_query(perf);
   adl_register_tdl_3_counter_query(perf);
   adl_register_gpu_busyness_counter_query(perf);
   adl_register_eu_activity1_counter_query(perf);
   adl_register_eu_activity2_counter_query(perf);
   adl_register_eu_activity3_counter_query(perf);
   adl_register_eu_activity4_counter_query(perf);
   adl_register_eu_activity5_counter_query(perf);
   adl_register_eu_activity6_counter_query(perf);
   adl_register_eu_activity7_counter_query(perf);
   adl_register_eu_activity8_counter_query(perf);
   adl_register_test_oa_counter_query(perf);
}